diff --git a/.gitattributes b/.gitattributes index eea259e2..50d31d15 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,4 @@ docs/apidocs/* binary + +# Mark Jelly files as binary +*.jelly binary diff --git a/.gitignore b/.gitignore index 84b1b3b2..43e76666 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,5 @@ ojdbc*.jar pom-oracle.xml dependency-reduced-pom.xml settings.xml +.classpath +.project \ No newline at end of file diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index efe1765d..856fa4c9 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,4 +1,5 @@ stages: + - lint - unittests - mirror - release @@ -9,14 +10,10 @@ include: - project: 'rml/util/ci-templates' ref: main file: 'CHANGELOG.gitlab-ci.yml' - # Make a Github Release on new tags - - project: 'rml/util/ci-templates' - ref: main - file: 'Github-Release.gitlab-ci.yml' - # Push a Docker Image to Docker Hub on new tags - - project: 'rml/util/ci-templates' - ref: main - file: 'Docker-Hub.gitlab-ci.yml' +# # Push a Docker Image to Docker Hub on new tags +# - project: 'rml/util/ci-templates' +# ref: main +# file: 'Docker-Hub.gitlab-ci.yml' # Push a build to Maven Central on new tags - project: 'rml/util/ci-templates' ref: main @@ -44,7 +41,7 @@ cache: # Cancel pipeline if a newer pipeline is running default: - interruptible: true + interruptible: true ############################### # # @@ -54,160 +51,47 @@ default: General: stage: unittests - image: maven:3.5.0-jdk-8 - retry: 2 - services: - - postgres:10.4 - - name: mcr.microsoft.com/mssql/server:latest - alias: sqlserver - script: - - 'mvn $MAVEN_CLI_OPTS -Dtest=!Mapper_OracleDB_Test test' - except: - - master - - development - -Oracle DB: - stage: unittests - image: gitlab.ilabt.imec.be:4567/rml/util/mvn-oracle-docker:latest - script: - - '/entrypoint.sh' - except: - - master - - development - - tags # Gitlab CI bot cannot access Docker images - -Docker Build: - stage: unittests - image: docker:latest + image: + name: maven:3-eclipse-temurin-21 + pull_policy: if-not-present services: - - docker:19.03.12-dind - before_script: - - docker info + - name: docker:29-dind + # explicitly disable tls to avoid docker startup interruption + command: ["--tls=false"] + variables: + # Instruct Testcontainers to use the daemon of DinD. + DOCKER_HOST: "tcp://docker:2375" + # Instruct Docker not to start over TLS. + DOCKER_TLS_CERTDIR: "" + # Improve performance with overlayfs. + DOCKER_DRIVER: overlay2 script: - - docker build -t rmlmapper . + - 'mvn $MAVEN_CLI_OPTS -Dtest=$TEST test 2>&1 | tee $TEST.log' + artifacts: + when: always + paths: + - '$TEST.log' + parallel: + matrix: + - TEST: [ArgumentsTest, MapperCSVTest, MapperJSONTest, MapperMySQLTest, MapperPostgresR2RMLTest, MapperWoTTest, ArgumentsTestMySQLTest, MapperCSVWTest, MapperLDESTest, MapperODSTest, MapperOracleDBTest, MapperPostgresXMLTest, MapperXMLTest, CustomRMLFnOMapperCSVTest, MapperEXCELTest, MapperMappingFileURLTest, MapperSPARQLTest, MetadataTest, CustomRMLFnOMapperJSONTest, CustomRMLFnOMapperTest, MapperHTMLTest, MapperMySQLR2RMLTest, MapperPostgresCSVTest, MapperSQLServerTest, OptimizationsTest, R2RMLConverterTest, QuadTest, ReadmeTest, ReadmeFunctionTest, ConformerDetectionTest, MapperNewRMLCoreJSONTest, MapperNewRMLIOSourceTest, MapperNewRMLIOTargetTest, HttpRequestTargetTest, MapperCrossConcatSequenceTest] except: - master - development -############################### -# # -# Automated Releases # -# # -############################### - -# Bump version, create changelog, commit changes to master -# Only happens when manually clicked in the CI pipeline -# Thanks to: https://www.benjaminrancourt.ca/how-to-push-to-a-git-repository-from-a-gitlab-ci-pipeline -# Description -# This script allows to store the artefacts of a step into the current -# repository, to improve the efficiency of the next build process. - -# Set up this script -# 1. Create a new personal access token (https://gitlab.com/-/profile/personal_access_tokens) -# with the following scopes: -# - read_repository -# - write_repository -# 2. Inside Settings -> CI / CD -> Variables, create the following variables: -# -# GITLAB_TOKEN Personal access token previously created. XGE2-k445hd5fbs94v9d -# (masked) -# GITLAB_USERNAME Username associated with the personal access token. ranb2002 -# COMMIT_MESSAGE Commit message Automatic update from the weekly schedule - -# Other variables used by this script -# The following variables are defined automatically by GitLab CI. Thus, you -# don't need to override them. -# -# CI_COMMIT_SHA Commit SHA, to use a unique directory name. e46f153dd47ce5f3ca8c56be3fb5d55039853655 -# CI_DEFAULT_BRANCH Default branch. main -# CI_PROJECT_PATH Current project path. ranb2002/benjaminrancourt.ca -# CI_SERVER_HOST Hostname of the current GitLab instance. gitlab.com -# GITLAB_USER_EMAIL Email of the user used to commit the changes to the ranb2002@gitlab.com -# secondary repository. -# GITLAB_USER_NAME User name of the user used to commit the changes to Benjamin Rancourt -# the secondary repository. -# -Create Release: - image: - entrypoint: [''] - name: alpine/git:${GIT_VERSION} - stage: release - before_script: - # Dependencies - - apk add maven java-jdk - # Clone the repository via HTTPS inside a new directory - - git clone "https://${GITLAB_USERNAME}:${GITLAB_TOKEN_RMLMAPPER}@${CI_SERVER_HOST}/${CI_PROJECT_PATH}.git" "${CI_COMMIT_SHA}" - - # Set the displayed user with the commits that are about to be made - - git config --global user.email "${GIT_USER_EMAIL:-$GITLAB_USER_EMAIL}" - - git config --global user.name "${GIT_USER_NAME:-$GITLAB_USER_NAME}" - script: - # Using before_script would override .git:push template - # User has to set the RELEASE_TAG_NAME variable - - if [ -z "$RELEASE_TAG_NAME" ]; then exit 1; else echo "Creating release v$RELEASE_TAG_NAME"; fi - # Install dependencies - - apk add nodejs npm - - npm install -g changefrog - - # Update pom.xml - - head -n6 pom.xml | sed "//s/>.*$RELEASE_TAG_NAME pom_updated.xml - - tail -n +7 pom.xml >> pom_updated.xml - - mv pom_updated.xml pom.xml - # Update changelog. Changefrog does not like vX.X.X so drop 'v' - - changefrog -n "$RELEASE_TAG_NAME" - # Build a Jar - - mvn install -DskipTests=true - # Stage changes for commit to master - - cp -u pom.xml "${CI_COMMIT_SHA}/pom.xml" - - cp -u CHANGELOG.md "${CI_COMMIT_SHA}/CHANGELOG.md" - - cp -u buildNumber.properties "${CI_COMMIT_SHA}/buildNumber.properties" - after_script: - # Go to the new directory - - cd "${CI_COMMIT_SHA}" - - # Add all generated files to Git - - git add . - - |- - # Check if we have modifications to commit - CHANGES=$(git status --porcelain | wc -l) - - if [ "$CHANGES" -gt "0" ]; then - # Show the status of files that are about to be created, updated or deleted - git diff --cached --shortstat - - # Commit all changes - COMMIT_MESSAGE="CHANGELOG: release v$RELEASE_TAG_NAME" - echo "Commit message: ${COMMIT_MESSAGE}" - git commit -m "${COMMIT_MESSAGE}" - - # Create git tag - git tag "v$RELEASE_TAG_NAME" - - # Update the repository and make sure to skip the pipeline create for this commit - git push origin "${CI_DEFAULT_BRANCH}" - # Push new tags and trigger the pipeline since we're deploying - git push --tags origin "${CI_DEFAULT_BRANCH}" - - # Also release on the master branch - git checkout master - git rebase "${CI_DEFAULT_BRANCH}" - git push origin master - fi - when: manual - -# Generate R2RML test report and send MR to rml.io website -# Push options: https://docs.gitlab.com/ee/user/project/push_options.html +# Generate R2RML test report and attach it as a artifact. +# Manual: make a MR to the rml.io website R2RML Test Report: stage: deploy - image: docker:latest + image: + name: docker:29 + pull_policy: if-not-present services: - - docker:19.03.12-dind + - docker:29-dind before_script: # Dependencies - apk add git python3 python3-dev py3-pip postgresql-dev postgresql libpq gcc musl-dev docker docker-compose java-jre-headless maven java-jdk # Clone the repository via HTTPS inside a new directory - - git clone "https://${GITLAB_USERNAME}:${GITLAB_TOKEN_WEBSITE}@${CI_SERVER_HOST}/rml/doc/rmlio-website.git" - git clone "https://github.com/kg-construct/r2rml-test-cases-support" # Set the displayed user with the commits that are about to be made @@ -218,8 +102,9 @@ R2RML Test Report: - docker info script: # Variables - - RELEASE_TAG_NAME=$(git tag -l "v*" --sort=-creatordate | head -n1) - - RELEASE_TAG_NAME=$(echo "$RELEASE_TAG_NAME" | cut -c2-) + - R2RML_TAG_NAME=$(git tag -l "v*" --sort=-creatordate | head -n1 || echo "$CI_COMMIT_REF_NAME") + - (if [ "$R2RML_TAG_NAME" == "$CI_COMMIT_REF_NAME" ]; then echo "$CI_COMMIT_REF_NAME"; else R2RML_TAG_NAME=$(echo "$R2RML_TAG_NAME" | cut -c2-); fi) + - R2RML_TAG_NAME=$(echo "$R2RML_TAG_NAME" | tr -d '\n') - TODAY=$(date +"%Y-%m-%d") # Build RMLMapper jar @@ -229,51 +114,26 @@ R2RML Test Report: - cp data/r2rml-test-cases-config-postgresql.ini r2rml-test-cases-support/config-postgresql.ini - cp data/r2rml-test-cases-config-mysql.ini r2rml-test-cases-support/config-mysql.ini - cd r2rml-test-cases-support - - echo "Generating test report for release v${RELEASE_TAG_NAME} on ${TODAY}" - - sed -i "s/VERSION/${RELEASE_TAG_NAME}/" config-postgresql.ini - - sed -i "s/VERSION/${RELEASE_TAG_NAME}/" config-mysql.ini + - echo "Generating test report for release v${R2RML_TAG_NAME} on ${TODAY}" + - sed -i "s/VERSION/${R2RML_TAG_NAME}/" config-postgresql.ini + - sed -i "s/VERSION/${R2RML_TAG_NAME}/" config-mysql.ini - sed -i "s/DATE/${TODAY}/" config-postgresql.ini - sed -i "s/DATE/${TODAY}/" config-mysql.ini # Install R2RML test cases dependencies - - python3 -m pip install -r requirements.txt + - python3 -m pip install requests 'rdflib==6.0.2' 'psycopg2-binary<3' 'mysql-connector-python<9' --break-system-packages # Execute R2RML test cases - HOST=docker python3 test.py config-postgresql.ini - HOST=$(getent hosts docker | cut -f1 -d " ") python3 test.py config-mysql.ini # MySQL wants an IP address - # Prepare MR for rml.io website - - cp results.ttl ../rmlio-website/tools/rmlmapper/r2rml-test-cases-results.ttl - cd .. - after_script: - - RELEASE_TAG_NAME=$(git tag -l "v*" --sort=-creatordate | head -n1) - - RELEASE_TAG_NAME=$(echo "$RELEASE_TAG_NAME" | cut -c2-) - - # Go to the new directory - - cd rmlio-website - - # Add all generated files to Git - - git add . - - |- - # Check if we have modifications to commit - CHANGES=$(git status --porcelain | wc -l) - BRANCH="rmlmapper/${RELEASE_TAG_NAME}" - - if [ "$CHANGES" -gt "0" ]; then - # Show the status of files that are about to be created, updated or deleted - git diff --cached --shortstat - - # Create new branch - git checkout -b "${BRANCH}" - - # Commit all changes - COMMIT_MESSAGE="tools/rmlmapper: update R2RML test report for RMLMapper ${RELEASE_TAG_NAME}" - echo "Commit message: ${COMMIT_MESSAGE}" - git commit -m "${COMMIT_MESSAGE}" - - # Update the repository and make sure to skip the pipeline create for this commit - echo "Creating Merge Request of branch ${BRANCH}" - git push origin "${BRANCH}" -o merge_request.create -o merge_request.remove_source_branch -o merge_request.title="${COMMIT_MESSAGE}" - fi - only: - - tags + # Run during merge requests and tags + except: + - development + - master + artifacts: + when: always + paths: + - $CI_PROJECT_DIR/r2rml-test-cases-support/results.ttl + - $CI_PROJECT_DIR/r2rml-test-cases-support/config-*.ini diff --git a/.m2/settings.xml b/.m2/settings.xml index f01b50ab..48953beb 100644 --- a/.m2/settings.xml +++ b/.m2/settings.xml @@ -6,10 +6,5 @@ xmlns="http://maven.apache.org/SETTINGS/1.1.0" xmlns:xsi="http://www.w3.org/2001 ${env.MAVEN_REPO_USER} ${env.MAVEN_REPO_PASS} - - ossrh - ${env.MAVEN_REPO_USER} - ${env.MAVEN_REPO_PASS} - diff --git a/CHANGELOG.md b/CHANGELOG.md index 17527c75..148a5c8e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,11 +2,385 @@ All notable changes to this project will be documented in this file. -The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) +The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.1.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). ## Unreleased +## [8.1.0] - 2025-12-23 + +### Added +- `crossConcat` and `crossConcatSequence` functions (by updating dependency to idlab-functions-java) + +### Changed +- Requires Java version >= 21 + +### Fixed +- Updated dependency function-agent-java to 1.3.0 +- Updated idlab-functions-java to 1.4.0 +- Publish to Maven Central using central-publishing-maven-plugin + +## [8.0.1] - 2025-12-11 + +### Fixed +- Dependency updates to address some vulnerabilities: +- DataIO to 2.5.2 +- Jena to 5.2.0 +- commons-lang3 to 3.18.0 +- json-smart to 2.5.2 +- Added dependency to commons-cli (a transitive dependency which got lost) +- Update DataIO to 2.2.0 (to address https://github.com/RMLio/rmlmapper-java/issues/266) +- Update testcontainers-* dependencies to 2.0.2 +- GitLab CI: update GitLab CI Docker (services) to version 29 +- Update grel-functions-java to v0.10.1 to fix bug in quotient function + +## [8.0.0] - 2025-09-18 + +### Fixed +- A join between triples maps with different logical targets might result in quads ending up in wrong targets. +- rmle:contentTypeHeader and rmle:acceptHeader is replaced by generic htv:headers for Http Request Targets. +- Comparing htv:headers might go wrong due to String comparison. +- Conversion to new RML goes wrong when an object map has term type Literal. + +### Added +- Option to serialize output as Jelly. + +### Changed +- Updated new RML test cases for IO and Core. +- Updated dependency of DataIO to 2.1.2 + +### Removed +- `QuadStore`: removed deprecated `write` methods. + +## [7.3.3] - 2025-04-07 + +### Fixed +- Better handle unsupported reference formulation (See [issue 247](https://github.com/RMLio/rmlmapper-java/issues/247)) +- Graph Maps with default graph must not be ignored (See [issue 252](https://github.com/RMLio/rmlmapper-java/issues/252)) +- Honor datatypes for constants (See [issue 251](https://github.com/RMLio/rmlmapper-java/issues/251)) +- Honor language tags for constants (See [issue 251](https://github.com/RMLio/rmlmapper-java/issues/251)) + +## [7.3.2] - 2025-03-04 + +### Fixed +- Updated Function Agent to 1.2.1 because of a bugfix (See [this issue in Gitlab](https://gitlab.ilabt.imec.be/KNoWS/fno/proc/function-component/-/issues/23), fixes [#249](https://github.com/RMLio/rmlmapper-java/issues/249) on GitHub; + +## [7.3.1] - 2025-01-22 + +### Fixed +- Upgraded dataio to 2.0.1 incorporating dataio security fixes. +- Upgraded logback-core to 1.5.16 to fix 2 security CVEs. + +## [7.3.0] - 2025-01-22 + +### Changed +- Some tests are changed because record error handling has changed. + +### Fixed +- Optimization for authentication with CSS Client Credentials +- Update URI of prefix 'rmle:' to persistent w3id URI: 'https://w3id.org/imec/rml/ns/extensions#' +- Added all known content types for newline delimited JSON +- Fixed the return type of GREL array slice function in function description. +- Updated all Jena library dependencies to version 5.0.0 +- Added explicit dependencies on commons-io:2.18.0, commons-compress-1.27.1 and commons-lang3:3.17.0 to avoid runtime errors. +- Updated rdf4j-client to 5.1.0 +- Updated wiremock to 3.9.1 (3.10.0 contains bugs in combination with saxon). +- Updated grel-functions-java to v0.10.0 +- Updated dataio to 2.0.0, *the* reason for most changes ;) +- Updated testcontainers to 1.18.3 +- Inconsistencies in test case RMLTC0007h (mapping + all formats) and RMLTC0009a (JSON) + +## [7.2.0] - 2024-12-09 + +### Added +- CLI option `--convert-mapping` to convert your existing R2RML and older RML mappings +to the latest version by the W3C Community Group. +- HTTP request targets +- dynamic logical targets + +### Fixed +- GREL array functions handling. + +## [7.1.2] - 2024-09-19 + +### Fixed +- `release.sh` now also updates `pom.xml` with the given tag. + +## [7.1.1] - 2024-09-19 + +### Fixed +- Updated idlab-functions-java to 1.3.3 to use correct state path in stateful functions when `ifState` environment variable is set. + +### Changed +- CI: use local script for release + +## [7.1.0] - 2024-08-13 + +### Added +- Serialization format TriG is now supported for Logical Targets (fixes GitHub [issue 225](https://github.com/RMLio/rmlmapper-java/issues/225)). + +### Fixed +- Crash when null is returned in an XPath function for subject generation (fixes GitHub [issue 236](https://github.com/RMLio/rmlmapper-java/issues/236)) +- Update to DataIO 1.3.1: `DATAIO_NULL` value occurring in RDF output when CSV quoted string has newline character in it (fixes GitHub [issue 238](https://github.com/RMLio/rmlmapper-java/issues/238)) +- RML Test report script: remove newlines from R2RML targ name + +## [7.0.0] - 2024-06-07 + +### Fixed +- Gitlab CI R2RML test cases compliance +- Gitlab CI Maven Central retrying on failure due to Maven Central server problems +- Gitlab CI Docker Hub deployment fixed +- Update DataIO dependency to 1.1.0 (fixes GitLab [issue 274](https://gitlab.ilabt.imec.be/rml/proc/rmlmapper-java/-/issues/274)) +- R2RML Test Report CI script failed in development due to missing pip parameter. +- Optimization for self-joins without join conditions (fixes Gitlab [issue 275](https://gitlab.ilabt.imec.be/rml/proc/rmlmapper-java/-/issues/275)) +- Optimization for self joins with join condition. +- Updated junit to 5.10.0 +- Updated jena to 4.9.0 +- Updated testcontainers to 1.18.3 +- Added explicit dependency on json-smart, since it was an indirect dependency which changed scope +- Updated idlab-functions-java to 1.3.2, fixing an issue with IncRML +- Updated dataio to 1.2.0 +- Changed scope of `json-path` dependency to `runtime` + +### Changed +- Changed basepath from null to http://example.com/ (see [issue 263](https://gitlab.ilabt.imec.be/rml/proc/rmlmapper-java/-/issues/263)) +- gitlab CI pull policy + +### Added +- Translation to new RML. Only for Core. + +## [6.5.1] - 2023-12-06 + +## Fixed +- Upgrade logback-classic to 1.4.14. +- Upgrade wiremock-jre8 to 2.35.1. + +## [6.5.0] - 2023-12-06 + +### Changed +- Update dependency on `idlab-functions-java` to 1.3.1. This implies three things: + - The namespace for IDLab functions FnO descriptions changed, so RMLMapper loads the old and the new one to remain compatible; + - All tests in RMLMapper use the new namespace; + - Stateful functions use a more compact and efficient state. +- Update dependency on `function-agent-java` to 1.2.0, the first version released on Maven Central. +- Upgraded rdf4j-client dependency to version 4.3.8 +- Compile regex patterns for RDF4J store only once for performance. + +### Fixed +- Setting option `disable-automatic-eof-marker` in a configuration file has no effect (see GitLab [issue #269](https://gitlab.ilabt.imec.be/rml/proc/rmlmapper-java/-/issues/269)). +- Start adhering to [Keep a Changelog](http://keepachangelog.com/en/1.1.0/) from now on (`Improvements` -> `Changed`). +- Compatibility issue with old IDLab functions FnO descriptions (see GitLab [issue #270](https://gitlab.ilabt.imec.be/rml/proc/rmlmapper-java/-/issues/270)) + +### Added +- Test case for working with CSV logical sources without header. + +## [6.4.0] - 2023-11-14 + +### Added +- Test handling Windows CRLF line ending for CSV files ([issue #201](https://github.com/RMLio/rmlmapper-java/issues/201)) +- Added test for empty column handling in csv logical source files ([issue #159](https://github.com/RMLio/rmlmapper-java/issues/159)) + +### Improvements +- Tests: let Java handle temporary files. +- pom: upgrade maven-surefire-plugin to 3.2.2 +- pom: upgrade nexus-staging-maven-plugin to 1.6.13 +- pom: upgrade json-path to 2.8.0 +- pom: upgrade logback-classic to 1.4.11 +- resources: rml-ldes: avoid symlinks +- pom: upgrade RDB connectors (postgresql 42.6.0, mssql-jdbc 12.4.2.jre11, ojdbc11 23.3.0.23.09, mysql-connector-j 8.2.0) + +### Changed +- Use the DataIO library to handle access to files. +- Requires Java 17 language level. +- Refactoring to use interfaces where possible (e.g. `ArrayList` -> `List`). + +### Fixed +- Test cases: add array initializers to avoid bugs. +- Use correct prefix for `EventStreamTarget`s in tests. +- Refactoring of Executor: code deduplication +- Document `--disable-automatic-eof-marker` option. + +## [6.3.0] - 2023-11-14 + +### Improvements +- Add support for detecting additions, modifications, and deletions in a Knowledge Graph with FnO functions. +- Verify LDES EventStreamTarget output for additions, modifications, and deletions. +- Added RML-LDES test-cases. +- Added test for mapping with CSV file where column does not match header + +### Changed +- Build Docker image in two stages, reducing the final image size. +- LDES EventStreamTarget properties are now all optional. +- Removed usage of legacy RDF model, using RDF4J instead. + +### Fixed +- Don't close output streams that don't need closing. +- Add url decoders to file paths so special characters and spaces work. +- Test cases: add array initializers to avoid bugs. + +## [6.2.2] - 2023-10-05 + +### Fixed +- Fixed self-joins with join conditions ([internal issue #199](https://github.com/RMLio/rmlmapper-java/issues/199)) +- Upgrade to rdfhdt to 3.0.10 ([issue #215](https://github.com/RMLio/rmlmapper-java/issues/215)) +- Improve docs on deduplication ([issue #214](https://github.com/RMLio/rmlmapper-java/issues/214)) +- Handle plain old Java Arrays as well for FnO return values +- Update dependency to `idlab-functions-java`, fixing [issue #218](https://github.com/RMLio/rmlmapper-java/issues/218) + +### Changed +- Dockerfile: switched to Eclipse OpenJDK Docker base image because OpenJDK is deprecated +- CI: dropped GitHub release automation +- CI: verify if token is valid + +### Improvements +- Heavily increased performance of RMLMapper through several optimizations + +## [6.2.1] - 2023-07-06 + +### Fixed +- Updated idlab-functions-java to v1.0.0 +- CSVW 1025 testcase now reads correct logical source + +## [6.2.0] - 2023-06-22 + +### Fixed +- Updated function-agent-java to v1.1.0 +- Updated grel-functions-java to v0.9.1 +- Updated idlab-functions-java to v0.3.1 +- Updated junit to 5.9.1 +- Updated testcontainers to 1.17.6 +- Updated logback to 1.4.5 +- Updated Saxon-HE to 11.4 +- Updated postgresql to 42.5.1 +- Detect non existing files early ([issue 223](https://gitlab.ilabt.imec.be/rml/proc/rmlmapper-java/-/issues/233)) +- Subject of LDES event stream has wrong IRI (internal GitLab issue [#253](https://gitlab.ilabt.imec.be/rml/proc/rmlmapper-java/-/issues/253)) +- MS SQLServer JDBC driver was only for tests on classpath (internal GitLab issue [#255](https://gitlab.ilabt.imec.be/rml/proc/rmlmapper-java/-/issues/255)) +- Normalising Double numbers from RDBs went wrong (internal GitLab issue [#256](https://gitlab.ilabt.imec.be/rml/proc/rmlmapper-java/-/issues/256), GitHub issue [#206](https://github.com/RMLio/rmlmapper-java/issues/206)) + +### Changed +- `tree:view` and `ldes:retentionPolicy` are omitted when generating LDES metadata. + +### Added +- A new Maven profile `no-buildnumber` disables using and updating `buildNumber.properties`. +- Mocked DBpedia Spotlight service ([issue #250](https://gitlab.ilabt.imec.be/rml/proc/rmlmapper-java/-/issues/250)) +- Update [dynamic-loading](https://github.com/RMLio/rmlmapper-java#dynamic-loading) in README.md + +## [6.1.3] - 2022-12-20 + +### Fixed +- Reraise execeptions from executor, so that CLI exits with non-zero exit code in the event of errors in execution ([issue 194](https://github.com/RMLio/rmlmapper-java/issues/194)) + +## [6.1.2] - 2022-11-22 + +## [6.1.1] - 2022-11-21 + +## [6.1.0] - 2022-11-21 + +### Changed +- Require Java 11+ +- Port all tests to Junit 5 +- Database tests are executed with a fresh instance of the required database in a Docker container +- Update of function libraries IDLab functions, GREL functions, Function Agent +- Removed need for (deprecated) SecurityManager + +### Fixed +- Dropped dependency on Guava +- Dropped explicit dependency on Jetty +- Dropped dependency on mariaDB4j +- `RDF4JStore` had wrong regex to check for Literals when they have a language tag or data type. +- Updated RDF4J to 4.2.1 +- Updated Jena to 4.6.1 +- Updated indirect dependencies to Xerces 2.12.2 +- Updated mysql-connector-java to 8.0.31 +- Updated postgresql to 42.5.0 +- Updated mssql-jdbc to 11.2.1.jre11 +- Updated com.fasterxml.jackson.core.* dependencies to 2.14.0 +- Updated jsoup to 1.15.3 +- Updated opencsv to 5.7.1 +- Updated poi-ooxml to 5.2.3 +- Updated testcontainers to 1.17.5 +- Updated mybatis to 3.5.11 +- Updated ojdbc8 21.6.0.0.1 to ojdbc11 21.7.0.0 +- Force Wiremock to use commons-fileupload 1.4 +- Main: Output paths can again be fully relative +- Dropped dependency on com.spotify.docker-client ([issue 231](https://gitlab.ilabt.imec.be/rml/proc/rmlmapper-java/-/issues/231)) +- Running multiple pipelines should no longer interfere with each other ([issue 245](https://gitlab.ilabt.imec.be/rml/proc/rmlmapper-java/-/issues/245)) +- Fixed TriplesMaps detection of MappingConformer [issue 251](https://gitlab.ilabt.imec.be/rml/proc/rmlmapper-java/-/issues/251) +- `NamedNodeGenerator` now checks if the given IRI is valid ([issue 249](https://gitlab.ilabt.imec.be/rml/proc/rmlmapper-java/-/issues/249)) + +### Added +- pom.xml: Added Testcontainers library dependencies for databases we test on +- pom.xml: Added JUnit5 dependencies +- Check for changelog changes in a separate lint stage during CI. + +## [6.0.0] - 2022-07-04 + +### Changed +- Run all tests in parallel on CI infrastructure +- Upgrade Function Agent to v0.1.0 + +### Fixed +- Upgrade postgresql JDBC driver to 42.3.3 +- Rename `logback.xml` to `logback-test.xml` in test resources (see [issue 240](https://gitlab.ilabt.imec.be/rml/proc/rmlmapper-java/-/issues/240)) +- If exception occurs while mapping data, already processed data gets written to output file +- Properly close resources such as input streams +- Github release notes are now properly extracted +- Discard UTF Byte-Order-Marks (BOM) (see [issue 171](https://github.com/RMLio/rmlmapper-java/issues/171)) +- Dropped lib directory for functions as we now use a separate FnO Function Agent + +### Added +- IDLabFunctions: added functions lookup and lookupWithDelimiter +- IDLabFunctionsTest: added unit tests for lookup functions +- IDLabFunctionsTest: relocate csv files used by tests for lookup function +- IDLabFunctions: silence stack trace +- CSVW: filter out rows with a comment prefix +- Output file path now gets checked before the mapping +- .gitignore: ignore vscode files +- pom.xml: add Saxon-HE v11.3 dependency +- XMLRecord: adjusted to use Saxon +- XMLRecordFactory: adjusted to use Saxon +- SaxNamespaceResolver: added class for resolving namespaces in Saxon. This fixes (#154)[https://github.com/RMLio/rmlmapper-java/issues/154]. +- Added unit tests for XPath 2/3 expressions and functions +- Loggers across the codebase have been updated to use parameterized strings +- Add support for WoT OAuth2 Security Scheme (see [issue 212](https://gitlab.ilabt.imec.be/rml/proc/rmlmapper-java/-/issues/212)) +- Integration of independent function handler + +## [5.0.0] - 2022-03-11 +- TestCore: enable debug logs when VERBOSE env variable is set (see [issue 230](https://gitlab.ilabt.imec.be/rml/proc/rmlmapper-java/-/issues/230)) +- Switched to fork of ODFtoolkit (see [issue 237](https://gitlab.ilabt.imec.be/rml/proc/rmlmapper-java/-/issues/237)) + +### Added + +- Add support for LDES Logical Target +- Add support for generating unique reproducible IRIs for LDES + +### Changed + +- Write LDES state to disk when mapping execution is complete. +- Removed deprecated execute() and executeWithFunction() methods, and refactored the executeV5() executeWithFunctionV5() method to execute() and executeWithFunction(). + +### Fixed + +- Clarified Readme for quick start +- No cartesian product when referring to the same logical source (see [issue 28](https://github.com/RMLio/rmlmapper-java/issues/28)]) + - If you still want the cartesian product, update your mappings to refer to different logical sources with the same contents +- Upgraded jetty-server and jetty-security to 9.4.44.v20210927 +- Upgraded wiremock-jre8 to 2.32.0 +- Upgraded ch.qos.logback to 1.2.10 +- Upgraded commons-cli to 1.5.0 +- Upgraded com.jayway.jsonpath to 2.7.0 +- Upgraded ch.vorburger.mariaDB4j to 2.5.3 +- Upgraded com.microsoft.sqlserver to 10.2.0.jre8 +- Upgraded com.fasterxml.jackson.core to 2.13.1 +- Upgraded org.jsoup to 1.14.3 +- Upgraded org.apache.poi to 5.0.0 +- Resources: functions_grel: use xsd:integer (see [issue 234](https://gitlab.ilabt.imec.be/rml/proc/rmlmapper-java/-/issues/234)) +- Print error if referenceformulation is unsupported + +### Changed + +- Upgrade postgresql JDBC driver to 42.3.2 (see [issue 146](https://github.com/RMLio/rmlmapper-java/issues/146)) + ## [4.15.0] - 2022-02-01 ### Fixed @@ -472,6 +846,30 @@ and [169](https://gitlab.ilabt.imec.be/rml/proc/rmlmapper-java/-/issues/169)) - support for accessing remote files (via HTTP GET) - basic support for functions +[8.1.0]: https://github.com/RMLio/rmlmapper-java/compare/v8.0.1...v8.1.0 +[8.0.1]: https://github.com/RMLio/rmlmapper-java/compare/v8.0.0...v8.0.1 +[8.0.0]: https://github.com/RMLio/rmlmapper-java/compare/v7.3.3...v8.0.0 +[7.3.3]: https://github.com/RMLio/rmlmapper-java/compare/v7.3.2...v7.3.3 +[7.3.2]: https://github.com/RMLio/rmlmapper-java/compare/v7.3.1...v7.3.2 +[7.3.1]: https://github.com/RMLio/rmlmapper-java/compare/v7.3.0...v7.3.1 +[7.3.0]: https://github.com/RMLio/rmlmapper-java/compare/v7.2.0...v7.3.0 +[7.2.0]: https://github.com/RMLio/rmlmapper-java/compare/v7.1.2...v7.2.0 +[7.1.2]: https://github.com/RMLio/rmlmapper-java/compare/v7.1.1...v7.1.2 +[7.1.1]: https://github.com/RMLio/rmlmapper-java/compare/v7.1.0...v7.1.1 +[7.1.0]: https://github.com/RMLio/rmlmapper-java/compare/v7.0.0...v7.1.0 +[7.0.0]: https://github.com/RMLio/rmlmapper-java/compare/v6.5.1...v7.0.0 +[6.5.1]: https://github.com/RMLio/rmlmapper-java/compare/v6.5.0...v6.5.1 +[6.5.0]: https://github.com/RMLio/rmlmapper-java/compare/v6.3.0...v6.5.0 +[6.3.0]: https://github.com/RMLio/rmlmapper-java/compare/v6.2.2...v6.3.0 +[6.2.2]: https://github.com/RMLio/rmlmapper-java/compare/v6.2.1...v6.2.2 +[6.2.1]: https://github.com/RMLio/rmlmapper-java/compare/v6.2.0...v6.2.1 +[6.2.0]: https://github.com/RMLio/rmlmapper-java/compare/v6.1.3...v6.2.0 +[6.1.3]: https://github.com/RMLio/rmlmapper-java/compare/v6.1.2...v6.1.3 +[6.1.2]: https://github.com/RMLio/rmlmapper-java/compare/v6.1.1...v6.1.2 +[6.1.1]: https://github.com/RMLio/rmlmapper-java/compare/v6.1.0...v6.1.1 +[6.1.0]: https://github.com/RMLio/rmlmapper-java/compare/v6.0.0...v6.1.0 +[6.0.0]: https://github.com/RMLio/rmlmapper-java/compare/v5.0.0...v6.0.0 +[5.0.0]: https://github.com/RMLio/rmlmapper-java/compare/v4.15.0...v5.0.0 [4.15.0]: https://github.com/RMLio/rmlmapper-java/compare/v4.14.3...v4.15.0 [4.14.3]: https://github.com/RMLio/rmlmapper-java/compare/v4.14.2...v4.14.3 [4.14.2]: https://github.com/RMLio/rmlmapper-java/compare/v4.14.1...v4.14.2 diff --git a/CI.md b/CI.md deleted file mode 100644 index 784c481c..00000000 --- a/CI.md +++ /dev/null @@ -1,6 +0,0 @@ -# Gitlab CI setup - -Add the following CI variables: - -- `DOCKER_HUB_USER`: hub.docker.com username -- `DOCKER_HUB_PASSWORD`: hub.docker.com password diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 00000000..11df63e6 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,23 @@ +cff-version: 1.2.0 +message: "If you use this software, please cite it as below." +authors: +- family-names: "Heyvaert" + given-names: "Pieter" + orcid: "https://orcid.org/0000-0002-1583-5719" +- family-names: "Van Assche" + given-names: "Dylan" + orcid: "https://orcid.org/0000-0002-7195-9935" +- family-names: "De Meester" + given-names: "Ben" + orcid: "https://orcid.org/0000-0003-0248-0987" +- family-names: "Haesendonck" + given-names: "Gerald" + orcid: "https://orcid.org/0000-0003-1605-3855" +- family-names: "de Vleeschauwer" + given-names: "Els" + orcid: "https://orcid.org/0000-0002-8630-3947" +- given-names: "Sitt Min Oo" + orcid: "https://orcid.org/0000-0001-9157-7507" +title: "RMLMapper-JAVA" +doi: 10.5281/zenodo.3929132 +url: "https://github.com/RMLio/rmlmapper-java" \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 78e95c7b..f58a25be 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,16 @@ -FROM openjdk:8-alpine +# Build image +FROM maven:3.9.12-eclipse-temurin-21-alpine AS buildimage -RUN apk add --no-cache git maven ADD . /rmlmapper-java -WORKDIR rmlmapper-java -RUN mvn clean install -DskipTests=true -RUN mv `find target/ -iname rmlmapper-*-all.jar;` /rmlmapper.jar +WORKDIR /rmlmapper-java +RUN mvn -Pno-buildnumber clean package -DskipTests=true +RUN mv $(readlink -f target/rmlmapper-*-all.jar) /rmlmapper.jar + +# "Runtime" image +FROM eclipse-temurin:25-jre +COPY --from=buildimage /rmlmapper.jar /rmlmapper.jar + WORKDIR /data ENTRYPOINT ["java", "-jar", "/rmlmapper.jar"] diff --git a/README.md b/README.md index 52484def..74218ff9 100644 --- a/README.md +++ b/README.md @@ -2,10 +2,13 @@ [![Maven Central](https://img.shields.io/maven-central/v/be.ugent.rml/rmlmapper.svg?label=Maven%20Central)](https://search.maven.org/search?q=g:%22be.ugent.rml%22%20AND%20a:%22rmlmapper%22) -The RMLMapper execute RML rules to generate Linked Data. +The RMLMapper executes RML rules to generate Linked Data. It is a Java library, which is available via the command line ([API docs online](https://javadoc.io/doc/be.ugent.rml/rmlmapper)). The RMLMapper loads all data in memory, so be aware when working with big datasets. +Want to get started quickly? Check out [Releases](#releases) on where to find the latest CLI build as a jar, +and see [Usage](#cli) on how to use the commandline interface! + ## Table of contents - [Features](#features) @@ -20,70 +23,107 @@ The RMLMapper loads all data in memory, so be aware when working with big datase - [Including functions](#including-functions) - [Generating metadata](#generating-metadata) - [Testing](#testing) + - [Command line](#command-line) + - [IntelliJ](#intellij) - [RDBs](#rdbs) -- [Deploy on Central Repository](#deploy-on-central-repository) - [Dependencies](#dependencies) - [Commercial Support](#commercial-support) - [Remarks](#remarks) + - [Typed spreadsheet files](#typed-spreadsheet-files) - [XML file parsing performance](#xml-file-parsing-performance) - [Language tag support](#language-tag-support) - [Duplicate removal and serialization format](#duplicate-removal-and-serialization-format) + - [I have a question! Where can I get help?](#i-have-a-question-where-can-i-get-help) - [Documentation](#documentation) - [UML Diagrams](#uml-diagrams) ## Features ### Supported + - local data sources: - Excel (.xlsx) - LibreOffice (.ods) - - CSV files (including CSVW) - - JSON files (JSONPath) + - CSV files + - including CSVW, for an overview of which parts of CSVW are supported, have a look at the [test cases, introduced in v4.4.0](https://github.com/RMLio/rmlmapper-java/tree/master/src/test/resources/test-cases-CSVW) + - JSON files (JSONPath (`@` can be used to select the current object.)) - XML files (XPath) - remote data sources: - relational databases (MySQL, PostgreSQL, Oracle, and SQLServer) - - Web APIs with W3C Web of Things + - Web APIs with W3C Web of Things, see [an exemplary test case, introduced in v4.13.0](https://github.com/RMLio/rmlmapper-java/blob/master/src/test/resources/web-of-things/logical-target/sparql/mapping.ttl) - SPARQL endpoints - files via HTTP urls (via GET) - - CSV files + - CSV files, see [an exemplary test case, introduced in v0.1.0](https://github.com/RMLio/rmlmapper-java/blob/master/src/test/resources/test-cases/RMLTC1003-CSV/mapping.ttl) - JSON files (JSONPath (`@` can be used to select the current object.)) - XML files (XPath) - functions (most cases) - For examples on how to use functions within RML mapping documents, you can have a look at the [RML+FnO test cases](https://github.com/RMLio/rml-fno-test-cases) + - Notable function examples include + - SHA1 and MD5 functions, see [this comment on issue 100](https://github.com/RMLio/rmlmapper-java/issues/100#issuecomment-826547387) + - Lookup function, see [this comment on issue 200](https://github.com/RMLio/rmlmapper-java/issues/220#issuecomment-1754422677) - configuration file - metadata generation -- output formats: nquads (default), turtle, trig, trix, jsonld, hdt - join conditions +- output formats: nquads (default), turtle, trig, trix, jsonld, hdt, [jelly](https://w3id.org/jelly) - targets: - local file - VoID dataset - SPARQL endpoint with SPARQL UPDATE + - [HTTP request access](https://rml.io/specs/access/httprequest/) + - [dynamic logical targets](https://rml.io/specs/target/dynamictarget/) + +All functionalities above refer to using RML as maintained at . +There is _some_ support for RML as developed within W3C's [Knowledge Graph Construction Community Group](https://www.w3.org/community/kg-construct/), for a recent view, see the [test cases, support introduced in v7.0.0](https://github.com/RMLio/rmlmapper-java/tree/master/src/test/resources/new-test-cases). ### Future + - functions (all cases) - conditions (all cases) +- logical views + - Currently, this can be mitigated: we maintain a preprocessor to support this: [RML-view-to-CSV](https://github.com/RMLio/rml-view-to-csv). - data sources: - NoSQL databases - TPF servers + - [HTTP request access](https://rml.io/specs/access/httprequest/) + - dynamic logical source + - Currently, this can be mitigated: RMLMapper allows to use the commandline interface to combine multiple mapping rules, so you could script it, for more info, see [issue 161](https://github.com/RMLio/rmlmapper-java/issues/161). ## Releases -The standalone jar file for every release can be found on the release's page on GitHub. -You can find the latest release [here](https://github.com/RMLio/rmlmapper-java/releases/latest). + +The standalone jar file (that has a [commandline interface](#cli)) for every release can be found on the release's page on GitHub. +You can find the latest release on [the dedicated RMLMapper-JAVA Github release page](https://github.com/RMLio/rmlmapper-java/releases/latest). +This is the recommended way to get started with RMLMapper. +Do you want to build from source yourself? Check [Build](#build). ## Build -The RMLMapper is build using Maven: `mvn install`. + +The RMLMapper is built using Maven. +As it is also tested against Oracle (check [the RDB documentation below](#rdbs) for details), +it needs a specific set-up to run all tests. +That's why we recommend to build without testing: `mvn install -DskipTests=true`. +If you want, you can install with tests, and just skip the Oracle tests: `mvn test -Dtest=!Mapper_OracleDB_Test`. + A standalone jar can be found in `/target`. Two jars are found in `/target`: a slim jar without bundled dependencies, and a standalone jar (suffixed with `-all.jar`) with all dependencies bundled. +Building with profile `no-buildnumber` disables using and updating `buildNumber.properties` (and uses `0` as build number), e.g.: + +```bash +mvn clean package -P no-buildnumber +``` + +outputs for example `target/rmlmapper--r0.jar` + ## Usage ### CLI + The following options are most common. - `-m, --mapping `: one or more mapping file paths and/or strings (multiple values are concatenated). - `-o, --output `: path to output file -- `-s,--serialization `: serialization format (nquads (default), trig, trix, jsonld, hdt) +- `-s,--serialization `: serialization format (nquads (default), trig, trix, jsonld, hdt, [jelly](https://w3id.org/jelly)) All options can be found when executing `java -jar rmlmapper.jar --help`, that output is found below. @@ -91,78 +131,66 @@ that output is found below. ``` usage: java -jar mapper.jar options: + -b,--base-iri Base IRI used to expand relative IRIs + in generated terms in the output. -c,--configfile path to configuration file - -d,--duplicates remove duplicates in the output + --convert-mapping Only convert the mapping to the + latest RML specification by the W3C + Community Group + -d,--duplicates remove duplicates in the HDT, + N-Triples, or N-Quads output + --disable-automatic-eof-marker Setting this option assumes input + data has a kind of End-of-File + marker. Don't use unless you're + absolutely sure what you're doing! -dsn,--r2rml-jdbcDSN DSN of the database when using R2RML rules -e,--metadatafile path to output metadata file - -f,--functionfile one or more function file paths (dynamic - functions with relative paths are found - relative to the cwd) + -f,--functionfile one or more function file paths + (dynamic functions with relative + paths are found relative to the cwd) -h,--help show help info - -l,--metadataDetailLevel generate metadata on given detail level - (dataset - triple - term) + -l,--metadataDetailLevel generate metadata on given detail + level (dataset - triple - term) -m,--mappingfile one or more mapping file paths and/or strings (multiple values are - concatenated). r2rml is converted to rml - if needed using the r2rml arguments. - -psd,--privatesecuritydata one or more private security files - containing all private security - information such as usernames, passwords, - certificates, etc. + concatenated). r2rml is converted to + rml if needed using the r2rml + arguments.RDF Format is determined + based on extension. -o,--outputfile path to output file (default: stdout) -p,--r2rml-password password of the database when using R2RML rules - -s,--serialization serialization format (nquads (default), - turtle, trig, trix, jsonld, hdt) - -t,--triplesmaps IRIs of the triplesmaps that should be - executed in order, split by ',' (default - is all triplesmaps) + -psd,--privatesecuritydata one or more private security files + containing all private security + information such as usernames, + passwords, certificates, etc. + -s,--serialization serialization format (nquads + (default), turtle, trig, trix, + jsonld, hdt, jelly) + --strict Enable strict mode. In strict mode, + the mapper will fail on invalid IRIs + instead of skipping them. + -t,--triplesmaps IRIs of the triplesmaps that should + be executed in order, split by ',' + (default is all triplesmaps) -u,--r2rml-username username of the database when using R2RML rules - -v,--verbose show more details in debugging output - --strict Enable strict mode. In strict mode, the - mapper will fail on invalid IRIs instead - of skipping them. - -b --base-IRI base IRI used to expand relative IRIs in - mapped terms. If not set and not in --strict - mode, will default to the @base directive - inside the provided mapping file. - + -v,--verbose show more details in debugging output ``` #### Accessing Web APIs with authentication The [W3C Web of Things Security Ontology](https://www.w3.org/2019/wot/security) -is used to describe how Web APIs authentication should be performed +is used to describe how Web APIs authentication should be performed but does not include the necessary credentials to access the Web API. These credentials can be supplied using the `-psd ` CLI argument. The `PATH` argument must point to one or more private security files which contain the necessary credentials to access the Web API. -An example can be found in the test cases +An example can be found in the test cases [src/test/resources/web-of-things](src/test/resources/web-of-things). -#### Accessing Oracle Database - -You need to add the Oracle JDBC driver manually to the class path -if you want to access an Oracle Database. -The required driver is `ojdbc8`. - -- Download `ojdbc8.jar` from [Oracle](https://www.oracle.com/database/technologies/jdbc-ucp-122-downloads.html). -- Execute the RMLMapper via - -``` -java -cp 'rmlmapper.jar:ojdbc8-12.2.0.1.jar' be.ugent.rml.cli.Main -m rules.rml.ttl -``` - -The options do the following: - -- `-cp 'rmlmapper.jar:ojdbc8-12.2.0.1.jar'`: Put the jar of the RMLMapper and JDBC driver in the classpath. -- `be.ugent.rml.cli.Main`: `be.ugent.rml.cli.Main` is the entry point of the RMLMapper. -- `-m rules.rml.ttl`: Use the RML rules in the file `rules.rml`.ttl. -The exact same options as the ones mentioned earlier are supported. - ### Library An example of how you can use the RMLMapper as an external library can be found @@ -187,13 +215,24 @@ The RMLMapper is executed in the `/data` folder in the Docker container. ### Including functions -There are two ways to include (new) functions within the RML Mapper - * dynamic loading: you add links to java files or jar files, and those files are loaded dynamically at runtime - * preloading: you register functionality via code, and you need to rebuild the mapper to use that functionality +There are three ways to include (new) functions within the RML Mapper + +- dynamic loading: you add links to java files or jar files, and those files are loaded dynamically at runtime +- preloading: you register functionality via code, and you need to rebuild the mapper to use that functionality +- add as dependency Registration of functions is done using a Turtle file, which you can find in `src/main/resources/functions.ttl` -The snippet below for example links an fno:function to a library, provided by a jar-file (`GrelFunctions.jar`). +#### Dynamic loading + +Create a Turtle file that describe the functions that need to be included and add the jar which contains those functions. + +> Note: the java or jar-files are found relative to the cwd. +You can change the functions.ttl path (or use multiple functions.ttl paths) using a commandline-option (`-f`). + +For example the snippets below dynamically link an fno:Function to a library, provided by a jar-file (`CustomFunctions.jar`). The example links a function that parses the latitude (`50.2`) out of the following string `"POINT (50.2 5.3)"`. + + `functions.ttl` contains the description of the function in Turtle: ```turtle @prefix dcterms: . @@ -205,35 +244,42 @@ The snippet below for example links an fno:function to a library, provided by a @prefix grelm: . @prefix rdfs: . -grel:toUpperCase a fno:Function ; - fno:name "to Uppercase" ; - rdfs:label "to Uppercase" ; - dcterms:description "Returns the input with all letters in upper case." ; +grel:parsePointLat a fno:Function ; + fno:name "parsePointLat" ; + rdfs:label "parsePointLat" ; + dcterms:description "Parse the latitude from a point." ; fno:expects ( grel:valueParam ) ; fno:returns ( grel:stringOut ) . grelm:javaString a fnoi:JavaClass ; - doap:download-page "GrelFunctions.jar" ; - fnoi:class-name "io.fno.grel.StringFunctions" . + doap:download-page "CustomFunctions.jar" ; + fnoi:class-name "CustomFunctions" . -grelm:uppercaseMapping - a fnoi:Mapping ; - fno:function grel:toUpperCase ; +grelm:parsePointLat + a fno:Mapping ; + fno:function grel:parsePointLat ; fno:implementation grelm:javaString ; - fno:methodMapping [ a fnom:StringMethodMapping ; - fnom:method-name "toUppercase" ] . + fno:methodMapping [ a fnom:Function ; + fnom:method-name "parsePointLat" ] . ``` -#### Dynamic loading +The accompanying java file `CustomFunctions.java`: -Just put the java or jar-file in the resources folder, -at the root folder of the jar-location, -or the parent folder of the jar-location, -it will be found dynamically. +```java +public class CustomFunctions { + public static String parsePointLat(String s) { + return s.replace("POINT ", "").replace('(', ' ').replace(')', ' ').trim().split("\\s+")[0]; + } +} +``` -> Note: the java or jar-files are found relative to the cwd. -You can change the functions.ttl path (or use multiple functions.ttl paths) using a commandline-option (`-f`). +To dynamically include the custom function, compile the java-file and include `functions.ttl` with the `-f` option: + +```bash +javac CustomFunctions.java && jar cvf CustomFunctions.jar CustomFunctions.class +java -jar mapper.jar -f functions.ttl +``` #### Preloading @@ -241,6 +287,14 @@ This overrides the dynamic loading. An example of how you can use Preload a custom function can be found at [./src/test/java/be/ugent/rml/readme/ReadmeFunctionTest.java](https://github.com/RMLio/rmlmapper-java/blob/master/src/test/java/be/ugent/rml/readme/ReadmeFunctionTest.java) +#### Adding as dependency + +This is most interesting if you use RMLMapper as a library in your own project. +Just add the dependency to the function library you want to use in your project. + +You can also add a function library as a Maven dependency in `pom.xml` of RMLMapper. +You'll have to rebuild RMLMapper to use it. + ### Generating metadata Conform to how it is described in the scientific paper [1], @@ -257,55 +311,69 @@ and up to which level metadata should be stored (dataset, triple, or term level ## Testing +### Command line + Run the tests via `test.sh`. +### IntelliJ + +Right-click `src/test/java` directory and select "Run 'All tests'". + #### Derived tests + Some tests (Excel, ODS) are derived from other tests (CSV) using a script (`./generate_spreadsheet_test_cases.sh`) ### RDBs -Make sure you have [Docker](https://www.docker.com) running. + +Make sure you have [Docker](https://www.docker.com) running. On Unix, others read-write permission (006) is required on `/var/run/docker.sock` in order to run the tests. +The tests will fail otherwise, as Testcontainers can't spin up the container. #### Problems -* A problem with Docker (can't start the container) causes the SQLServer tests to fail locally. These tests will always succeed locally. -* A problem with Docker (can't start the container) causes the PostgreSQL tests to fail locally on Windows 7 machines. + +- A problem with Docker (can't start the container) causes the SQLServer tests to fail locally. These tests will always succeed locally. +- A problem with Docker (can't start the container) causes the PostgreSQL tests to fail locally on Windows 7 machines. ## Dependencies -| Dependency | License | -|:---------------------------------------:|--------------------------------------------------------------------| -| ch.qos.logback logback-classic | Eclipse Public License 1.0 & GNU Lesser General Public License 2.1 | -| commons-cli commons-lang | Apache License 2.0 | -| com.opencsv opencsv | Apache License 2.0 | -| commons-cli commons-cli | Apache License 2.0 | -| org.eclipse.rdf4j rdf4j-runtime | Eclipse Public License 1.0 | -| junit junit | Eclipse Public License 1.0 | -| com.jayway.jsonpath json-path | Apache License 2.0 | -| javax.xml.parsers jaxp-api | Apache License 2.0 | -| org.jsoup | MIT | -| mysql mysql-connector-java | GNU General Public License v2.0 | -| ch.vorbuger.mariaDB4j mariaDB4j | Apache License 2.0 | -| postgresql postgresql | BSD | -| com.microsoft.sqlserver mssql-jdbc | MIT | -| com.spotify docker-client | Apache License 2.0 | -| com.fasterxml.jackson.core jackson-core | Apache License 2.0 | -| org.eclipse.jetty jetty-server | Eclipse Public License 1.0 & Apache License 2.0 | -| org.eclipse.jetty jetty-security | Eclipse Public License 1.0 & Apache License 2.0 | -| org.apache.jena apache-jena-libs | Apache License 2.0 | -| org.apache.jena jena-fuseki-embedded | Apache License 2.0 | -| com.github.bjdmeest hdt-java | GNU Lesser General Public License v3.0 | -| commons-validator commons-validator | Apache License 2.0 | -| com.github.fnoio grel-functions-java | MIT | +| Dependency | License | +|:--------------------------------------:|--------------------------------------------------------------------| +| ch.qos.logback logback-classic | Eclipse Public License 1.0 & GNU Lesser General Public License 2.1 | +| com.github.fnoio function-agent-java | MIT | +| com.github.fnoio grel-functions-java | MIT | +| com.github.fnoio idlab-functions-java | MIT | +| com.github.rdfhdt hdt-java | GNU Lesser General Public License v3.0 | +| com.github.tomakehurst:wiremock-jre8 | Apache License 2.0 | +| com.google.protobuf protobuf-java | BSD 3-clause | +| com.microsoft.sqlserver mssql-jdbc | MIT | +| com.mysql mysql-connector-java | GNU General Public License v2.0 | +| com.oracle.database.jdbc:ojdbc11 | Oracle Free Use Terms and Conditions | +| eu.neverblink.jelly jelly-core | Apache License 2.0 | +| eu.neverblink.jelly jelly-rdf4j | Apache License 2.0 | +| net.minidev json-smart | Apache License 2.0 | +| org.apache.jena fuseki-main | Apache License 2.0 | +| org.eclipse.rdf4j rdf4j-client | Eclipse Distribution License v1.0 | +| org.junit.jupiter junit-jupiter-api | Eclipse Public License v2.0 | +| org.junit.jupiter junit-jupiter-engine | Eclipse Public License v2.0 | +| org.junit.jupiter junit-jupiter-params | Eclipse Public License v2.0 | +| org.junit.vintage junit-vintage-engine | Eclipse Public License v2.0 | +| org.postgresql postgresql | BSD | +| org.testcontainers jdbc | MIT | +| org.testcontainers junit-jupiter | MIT | +| org.testcontainers mssqlserver | MIT | +| org.testcontainers mysql | MIT | +| org.testcontainers oracle-xe | MIT | +| org.testcontainers postgresql | MIT | ## Commercial Support Do you need... -- training? -- specific features? -- different integrations? -- bugfixes, on _your_ timeline? -- custom code, built by experts? -- commercial support and licensing? +- training? +- specific features? +- different integrations? +- bugfixes, on _your_ timeline? +- custom code, built by experts? +- commercial support and licensing? You're welcome to [contact us](mailto:info@rml.io) regarding on-premise, enterprise, and internal installations, integrations, and deployments. @@ -317,6 +385,7 @@ We also offer consulting for all-things-RML. ## Remarks ### Typed spreadsheet files + All spreadsheet files are as of yet regarded as plain CSV files. No type information like Currency, Date... is used. ### XML file parsing performance @@ -334,38 +403,44 @@ The regex has no support for languages of length 5-8, but this currently only ap Performance depends on the serialization format (`--serialization `) and if duplicate removal is enabled (`--duplicates`). -Experimenting with various configurations may lead to better performance for +Experimenting with various configurations may lead to better performance for your use case. ### I have a question! Where can I get help? -Do you have any question related to writing RML mapping rules, -the RML specification, etc., feel free to ask them -here: https://github.com/kg-construct/rml-questions ! -If you have found a bug or need a feature for the RMLMapper itself, +Do you have any question related to writing RML mapping rules, +the RML specification, etc., feel free to ask them +here: ! +If you have found a bug or need a feature for the RMLMapper itself, you can make an issue in this repository. ## Documentation + Generate static files at /docs/apidocs with: -``` + +```bash mvn javadoc:javadoc ``` ### UML Diagrams #### Architecture UML Diagram + ##### How to generate with IntelliJ IDEA + (Requires Ultimate edition) -* Right click on package: "be.ugent.rml" -* Diagrams > Show Diagram > Java Class Diagrams -* Choose what properties of the classes you want to show in the upper left corner -* Export to file > .png | Save diagram > .uml +- Right click on package: "be.ugent.rml" +- Diagrams > Show Diagram > Java Class Diagrams +- Choose what properties of the classes you want to show in the upper left corner +- Export to file > .png | Save diagram > .uml #### Sequence Diagram + ##### Edit on [draw.io](https://www.draw.io) -* Go to [draw.io](https://www.draw.io) -* Click on 'Open Existing Diagram' and choose the .html file + +- Go to [draw.io](https://www.draw.io) +- Click on 'Open Existing Diagram' and choose the .html file [1]: A. Dimou, T. De Nies, R. Verborgh, E. Mannens, P. Mechant, and R. Van de Walle, “Automated metadata generation for linked data generation and publishing workflows,” in Proceedings of the 9th Workshop on Linked Data on the Web, Montreal, Canada, 2016, pp. 1–10. [PDF](http://events.linkeddata.org/ldow2016/papers/LDOW2016_paper_04.pdf) diff --git a/RELEASE.md b/RELEASE.md index 26820244..72f1ab46 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,6 +1,21 @@ # Release process -## Release branch and tags +## Automated + +1. Go to Gitlab's Pipelines in the repository. +2. Click on the grey job with an arrow in it (`development` branch). +3. Click on 'Create Release' manually job of the pipeline, not on the play button! +4. Enter the following key-value variable: + - Key: `RELEASE_TAG_NAME` + - Value: The git tag you want, for example: `1.2.3` +5. Press 'Run job' + +All release steps are automatically executed including syncing the `master` branch, git tags, Maven, Docker, etc. +Once the job completes, make manually a GitHub Release from the tag your created on GitHub. + +## Manually + +### Release branch and tags 1. Make a new release branch (named `release/X.Y.Z`) 2. Bump version number in `pom.xml` @@ -11,13 +26,13 @@ 7. Create a git tag: `git tag $TAG` on `master`. 8. Push tag: `git push --tags` -## Docker image +### Docker image 1. Run `docker build -t rmlio/rmlmapper-java:$TAG .` to generate a Docker image for your `$TAG`. 2. Repeat this for the `latest` tag: `docker build -t rmlio/rmlmapper-java:latest` 3. Push Docker images to Docker Hub: `docker push rmlio/rmlmapper-java:$TAG` and `docker push rmlio/rmlmapper-java:latest`. -## Deploy on Central Repository +### Deploy on Central Repository The following steps deploy a new version to the Central Repository, based on [this tutorial](https://central.sonatype.org/pages/apache-maven.html). @@ -30,7 +45,7 @@ copy `settings.example.xml` to `~/.m2/settings.xml`. 5. Make sure `JAVA_HOME` is properly set for your setup. Example: `export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64` 6. Deploy the latest release via `mvn clean deploy -P release -DskipTests=true`. -## Create a release on Github +### Create a release on Github 1. Update the `master` and `development` branches on Github. 2. Go to the Github repo and make a new release. @@ -38,7 +53,7 @@ copy `settings.example.xml` to `~/.m2/settings.xml`. 4. Add the content of `CHANGELOG.md` in the release notes 5. Append the fat jar you generated earlier as release binaries -## Create a Merge Request to Alpine Linux's aports +### Create a Merge Request to Alpine Linux's aports 1. Make sure you have `pmbootstrap` installed and ran `pmbootstrap init`, see https://wiki.postmarketos.org/wiki/Installing_pmbootstrap#Installing_automatically 3. Fork and clone aports: https://gitlab.alpinelinux.org/alpine/aports @@ -51,7 +66,7 @@ copy `settings.example.xml` to `~/.m2/settings.xml`. 10. Push to your fork and create a Merge Request in Alpine Linux's Gitlab. 11. If the CI properly passes, maintainers will merge it in the next few hours or days. -## Re-run the R2RML implementation report test cases +### Re-run the R2RML implementation report test cases 1. Re-run them for this `$TAG` 2. Make a merge request to the `rmlio` website diff --git a/buildNumber.properties b/buildNumber.properties index ad7e6c73..a30686cb 100644 --- a/buildNumber.properties +++ b/buildNumber.properties @@ -1,3 +1,3 @@ #maven.buildNumber.plugin properties file -#Tue Feb 01 10:31:54 GMT 2022 -buildNumber0=360 +#Thu Dec 11 14:28:34 CET 2025 +buildNumber0=379 diff --git a/get-changes.sh b/get-changes.sh deleted file mode 100755 index 88ab054e..00000000 --- a/get-changes.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash - -FOUND_CHANGES=false -cat CHANGELOG.md | while read line; do - # Detect end of new changes - if [[ "$line" == "## "* ]] && [[ "$FOUND_CHANGES" == true ]]; then - exit 0 - fi - - # Print new changes - if [[ $FOUND_CHANGES == true ]]; then - echo "$line" - fi - - # Detect start of new changes - if [[ "$line" == "## Unreleased"* ]]; then - FOUND_CHANGES=true - fi -done diff --git a/pom.xml b/pom.xml index bbb7d810..3defbb85 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ be.ugent.rml rmlmapper RMLMapper - 4.15.0 + 8.1.0 The RMLMapper executes RML rules to generate high quality Linked Data from multiple originally (semi-)structured data sources. @@ -35,9 +35,12 @@ UTF-8 - 4.13.2 - 8 - 8 + 5.10.0 + 21 + 21 + 5.2.0 + 3.2.0 + 2.0.2 @@ -56,14 +59,28 @@ - - - ossrh - https://oss.sonatype.org/content/repositories/snapshots - - - + + no-buildnumber + + 0 + + + + + org.codehaus.mojo + buildnumber-maven-plugin + 1.4 + + + buildnumber-generation + none + + + + + + release @@ -71,7 +88,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.1.0 + 3.5.0 8 @@ -92,7 +109,7 @@ org.apache.maven.plugins maven-source-plugin - 2.2.1 + 3.3.0 attach-sources @@ -105,7 +122,7 @@ org.apache.maven.plugins maven-gpg-plugin - 1.5 + 3.1.0 sign-artifacts @@ -120,44 +137,95 @@ - + ch.qos.logback logback-classic - 1.2.3 + 1.5.16 + + - commons-lang - commons-lang - 2.6 + commons-io + commons-io + 2.18.0 + + + org.apache.commons + commons-compress + 1.27.1 + + + + org.apache.commons + commons-lang3 + 3.18.0 + + commons-cli commons-cli - 1.4 + 1.10.0 + + org.eclipse.rdf4j - rdf4j-runtime - 2.5.5 + rdf4j-client + 5.1.0 + pom + + + commons-io + commons-io + + + commons-codec + commons-codec + + + com.opencsv + opencsv + + + + + org.junit.jupiter + junit-jupiter-engine + ${junit.version} + test + + + org.junit.jupiter + junit-jupiter-api + ${junit.version} + test - junit - junit + org.junit.jupiter + junit-jupiter-params ${junit.version} test - com.github.stefanbirkner - system-rules - 1.19.0 + org.junit.vintage + junit-vintage-engine + ${junit.version} test com.jayway.jsonpath json-path - 2.6.0 + 2.9.0 + runtime + + + + net.minidev + json-smart + 2.5.2 javax.xml.parsers @@ -165,144 +233,169 @@ 1.4.5 - mysql - mysql-connector-java - 8.0.26 + org.wiremock + wiremock-jetty12 + 3.9.1 + test - ch.vorburger.mariaDB4j - mariaDB4j - 2.4.0 + org.apache.jena + jena-fuseki-main + ${jena.version} test - postgresql - postgresql - 9.1-901-1.jdbc4 + org.rdfhdt + hdt-java-core + 3.0.10 + + + org.apache.jena + * + + + junit + * + + + org.apache.commons + commons-compress + + - - com.microsoft.sqlserver - mssql-jdbc - 7.2.2.jre8 - test + eu.neverblink.jelly + jelly-rdf4j + ${jelly.version} + + + org.eclipse.rdf4j + * + + - com.spotify - docker-client - 8.16.0 - test + be.ugent.idlab.knows + function-agent-java + 1.3.0 + + + org.apache.jena + * + + - com.fasterxml.jackson.core - jackson-core - 2.12.1 + com.github.fnoio + grel-functions-java + v0.10.1 + + + org.apache.commons + commons-text + + + commons-codec + commons-codec + + - com.fasterxml.jackson.core - jackson-databind - 2.12.1 + be.ugent.idlab.knows + idlab-functions-java + 1.4.0 + + + org.apache.commons + commons-lang3 + + + org.apache.commons + commons-text + + - com.fasterxml.jackson.core - jackson-annotations - 2.12.1 + be.ugent.idlab.knows + dataio + 2.2.0 + + + org.apache.commons + commons-lang3 + + - - org.eclipse.jetty - jetty-server - 9.4.17.v20190418 - test + + org.bitbucket.b_c + jose4j + 0.9.6 - + - org.eclipse.jetty - jetty-security - 9.4.17.v20190418 + org.testcontainers + testcontainers-postgresql + ${testcontainers.version} test - com.github.tomakehurst - wiremock-jre8 - 2.23.2 + org.testcontainers + testcontainers-mysql + ${testcontainers.version} test - org.apache.jena - apache-jena-libs - pom - 3.8.0 + org.testcontainers + testcontainers-mssqlserver + ${testcontainers.version} test - com.hp.hpl.jena - arq - 2.8.8 + org.testcontainers + testcontainers-oracle-xe + ${testcontainers.version} test - - org.apache.jena - jena-fuseki-embedded - 3.8.0 + org.testcontainers + testcontainers-jdbc + ${testcontainers.version} test - com.github.rdfhdt - hdt-java - v2.1.2 - - - commons-validator - commons-validator - 1.7 - - - com.github.fnoio - grel-functions-java - v0.7.1 - - - com.github.slugify - slugify - 2.5 - - - org.jsoup - jsoup - 1.14.2 - - - - com.opencsv - opencsv - 5.5.2 + org.testcontainers + testcontainers-junit-jupiter + ${testcontainers.version} + test + - org.apache.poi - poi-ooxml - 4.1.0 + org.json + json + 20240303 - + - - org.apache.odftoolkit - simple-odf - 0.8.2-incubating + org.testcontainers + testcontainers + ${testcontainers.version} + compile - tools - com.sun + org.apache.commons + commons-compress + + + commons-io + commons-io + + org.apache.commons + commons-lang3 + + - @@ -317,7 +410,7 @@ ${maven.compiler.source} ${maven.compiler.source} - 3.8.1 + 3.13.0 org.apache.maven.plugins @@ -363,10 +456,12 @@ org.apache.maven.plugins maven-surefire-plugin - 3.0.0-M3 + 3.3.1 true false + 1 + false @@ -377,6 +472,7 @@ 1.4 + buildnumber-generation validate create @@ -391,14 +487,13 @@ - org.sonatype.plugins - nexus-staging-maven-plugin - 1.6.7 + org.sonatype.central + central-publishing-maven-plugin + 0.9.0 true - ossrh - https://oss.sonatype.org/ - true + central + true diff --git a/pom.xml.versionsBackup b/pom.xml.versionsBackup new file mode 100644 index 00000000..a13886d9 --- /dev/null +++ b/pom.xml.versionsBackup @@ -0,0 +1,436 @@ + + 4.0.0 + be.ugent.rml + rmlmapper + RMLMapper + 7.1.0 + + The RMLMapper executes RML rules to generate high quality Linked Data from multiple originally (semi-)structured data sources. + + https://github.com/RMLio/rmlmapper-java + + + The MIT License + https://raw.githubusercontent.com/RMLio/rmlmapper-java/master/LICENSE + repo + + + + + pheyvaer + Pieter Heyvaert + pieter.heyvaert@ugent.be + + + bjdmeest + Ben De Meester + ben.demeester@ugent.be + + + andimou + Anastasia Dimou + anastasia.dimou@ugent.be + + + + + UTF-8 + 5.10.0 + 17 + 17 + 4.9.0 + 1.18.3 + + + + scm:git:ssh://git@github.com:RMLio/rmlmapper-java.git + https://github.com/RMLio/rmlmapper-java + + + + + repo.maven.apache.org + https://repo.maven.apache.org/maven2/ + + + jitpack.io + https://jitpack.io + + + + + + ossrh + https://oss.sonatype.org/content/repositories/snapshots + + + + + + no-buildnumber + + 0 + + + + + org.codehaus.mojo + buildnumber-maven-plugin + 1.4 + + + buildnumber-generation + none + + + + + + + + release + + + + org.apache.maven.plugins + maven-javadoc-plugin + 3.4.1 + + 8 + + + + attach-javadocs + + jar + + + public + false + none + + + + + + org.apache.maven.plugins + maven-source-plugin + 3.2.1 + + + attach-sources + + jar-no-fork + + + + + + org.apache.maven.plugins + maven-gpg-plugin + 1.5 + + + sign-artifacts + verify + + sign + + + + + + + + + + + + ch.qos.logback + logback-classic + 1.4.14 + + + org.eclipse.rdf4j + rdf4j-client + 4.3.8 + pom + + + org.mapdb + * + + + + + org.junit.jupiter + junit-jupiter-engine + ${junit.version} + test + + + org.junit.jupiter + junit-jupiter-api + ${junit.version} + test + + + org.junit.jupiter + junit-jupiter-params + ${junit.version} + test + + + org.junit.vintage + junit-vintage-engine + ${junit.version} + test + + + com.jayway.jsonpath + json-path + 2.9.0 + runtime + + + + net.minidev + json-smart + 2.5.0 + + + javax.xml.parsers + jaxp-api + 1.4.5 + + + com.mysql + mysql-connector-j + 8.2.0 + provided + + + org.postgresql + postgresql + 42.6.0 + provided + + + + com.microsoft.sqlserver + mssql-jdbc + 12.4.2.jre11 + provided + + + com.oracle.database.jdbc + ojdbc11 + 23.3.0.23.09 + provided + + + com.github.tomakehurst + wiremock-jre8 + 2.35.1 + test + + + commons-fileupload + commons-fileupload + + + + + org.apache.jena + jena-fuseki-main + ${jena.version} + test + + + + org.rdfhdt + hdt-java-core + 3.0.10 + + + org.apache.jena + * + + + junit + * + + + + + be.ugent.idlab.knows + function-agent-java + 1.2.0 + + + org.apache.jena + * + + + + + com.github.fnoio + grel-functions-java + v0.9.1 + + + be.ugent.idlab.knows + idlab-functions-java + 1.3.3 + + + be.ugent.idlab.knows + dataio + 1.3.1 + + + + org.testcontainers + postgresql + ${testcontainers.version} + test + + + org.testcontainers + mysql + ${testcontainers.version} + test + + + org.testcontainers + mssqlserver + ${testcontainers.version} + test + + + org.testcontainers + oracle-xe + ${testcontainers.version} + test + + + org.testcontainers + jdbc + ${testcontainers.version} + test + + + org.testcontainers + junit-jupiter + ${testcontainers.version} + test + + + + + src/main/java + src/test/java + ${project.artifactId}-${project.version}-r${buildNumber} + + + org.apache.maven.plugins + maven-compiler-plugin + + ${maven.compiler.source} + ${maven.compiler.source} + + 3.8.1 + + + org.apache.maven.plugins + maven-shade-plugin + 3.2.1 + + true + r${buildNumber}-all + + + be.ugent.rml.cli.Main + + + + + false + + + + + *:* + + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + make-assembly + package + + shade + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 3.2.2 + + true + false + 1 + false + + + + + + org.codehaus.mojo + buildnumber-maven-plugin + 1.4 + + + buildnumber-generation + validate + + create + + + + + {0,number} + + buildNumber0 + + + + + org.sonatype.plugins + nexus-staging-maven-plugin + 1.6.13 + true + + ossrh + https://oss.sonatype.org/ + true + + + + + diff --git a/release.sh b/release.sh new file mode 100755 index 00000000..fb082c38 --- /dev/null +++ b/release.sh @@ -0,0 +1,46 @@ +#!/bin/bash +set -e + +TAG=$1 +DEV_BRANCH="development" +RELEASE_BRANCH="master" + +if [ -z "$1" ]; then + echo "Supply release tag as 'X.Y.Z'. For example: ./release.sh 1.0.0" + exit 1 +fi + +# Dependencies and branch +echo "Installing dependencies and branch..." + +if ! changefrog --help > /dev/null; then + npm install -g changefrog > /dev/null +fi +git checkout "$DEV_BRANCH" + +# Update CHANGELOG.md +echo "Updating CHANGELOG.md" +changefrog -n "$TAG" > /dev/null + +# Update project version in pom.xml +mvn versions:set -DnewVersion=$TAG + +# Create release commit +echo "Creating git commit and tag" +git add . +git commit -m "release v$TAG" +git tag "v$TAG" + +# Add release commit to master branch +echo "Rebasing $DEV_BRANCH upon $RELEASE_BRANCH" +git checkout "$RELEASE_BRANCH" +git rebase "$DEV_BRANCH" + +# Push to branches +echo "Pushing branches..." +git push origin "$DEV_BRANCH" +git push origin "$RELEASE_BRANCH" +git push --tags origin "$RELEASE_BRANCH" +git checkout "$DEV_BRANCH" + +echo "Done!" diff --git a/src/main/java/be/ugent/rml/Executor.java b/src/main/java/be/ugent/rml/Executor.java index f1586d2a..7abfc8f9 100644 --- a/src/main/java/be/ugent/rml/Executor.java +++ b/src/main/java/be/ugent/rml/Executor.java @@ -1,86 +1,113 @@ package be.ugent.rml; -import be.ugent.rml.functions.FunctionLoader; +import be.ugent.idlab.knows.dataio.access.LocalFileAccess; +import be.ugent.idlab.knows.dataio.access.RemoteFileAccess; +import be.ugent.idlab.knows.dataio.record.Record; +import be.ugent.idlab.knows.functions.agent.Agent; +import be.ugent.knows.idlabFunctions.IDLabFunctions; +import be.ugent.rml.conformer.MappingConformer; import be.ugent.rml.functions.MultipleRecordsFunctionExecutor; import be.ugent.rml.metadata.Metadata; import be.ugent.rml.metadata.MetadataGenerator; -import be.ugent.rml.records.Record; +import be.ugent.rml.records.MarkerRecord; import be.ugent.rml.records.RecordsFactory; -import be.ugent.rml.store.RDF4JStore; -import be.ugent.rml.term.ProvenancedQuad; +import be.ugent.rml.store.Quad; import be.ugent.rml.store.QuadStore; -import be.ugent.rml.term.NamedNode; -import be.ugent.rml.term.ProvenancedTerm; -import be.ugent.rml.term.Term; +import be.ugent.rml.store.RDF4JStore; +import be.ugent.rml.term.*; +import be.ugent.rml.termgenerator.TermGenerator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.InputStream; +import java.nio.file.NoSuchFileException; import java.util.*; -import java.util.function.BiConsumer; public class Executor { private static final Logger logger = LoggerFactory.getLogger(Executor.class); - private Initializer initializer; - private HashMap> recordsHolders; + private final Initializer initializer; + private final MappingOptimizer mappingOptimizer; + private final Map> recordsHolders = new HashMap<>(); + /* * this map stores for every Triples Map, which is a Term, * a map with the record index and the record's corresponding subject, which is a ProvenancedTerm. */ - private HashMap> subjectCache; - private QuadStore resultingQuads; - private QuadStore rmlStore; - private HashMap targetStores; - private RecordsFactory recordsFactory; + private final Map>> subjectCache; + private final QuadStore resultingQuads; + private final QuadStore rmlStore; + private final Map targetStores; + private final RecordsFactory recordsFactory; private static int blankNodeCounter; - private HashMap mappings; - private String baseIRI; - private final StrictMode strictMode; + private final Map mappings; - public Executor(QuadStore rmlStore, RecordsFactory recordsFactory, String baseIRI, StrictMode strictMode) throws Exception { - this(rmlStore, recordsFactory, null, null, baseIRI, strictMode); - } + /** + * Indicates whether the data to process contains an End-of-File (EOF) marker. + * If it is not provided, it will be automatically inserted when necessary. + * Don't change unless you're generating LDES and know what you're doing! + */ + private boolean EOFProvidedInData = false; - public Executor(QuadStore rmlStore, RecordsFactory recordsFactory, FunctionLoader functionLoader, String baseIRI, StrictMode strictMode) throws Exception { - this(rmlStore, recordsFactory, functionLoader, null, baseIRI, strictMode); + public Executor(QuadStore rmlStore, RecordsFactory recordsFactory, String baseIRI, StrictMode strictMode, final Agent functionAgent) throws Exception { + this(rmlStore, recordsFactory, null, baseIRI, strictMode, functionAgent); } /** * Defaults to best effort operation. For strict mode, - * use {@link Executor#Executor(QuadStore, RecordsFactory, FunctionLoader, QuadStore, String, StrictMode)} + * use {@link Executor#Executor(QuadStore, RecordsFactory, QuadStore, String, StrictMode, Agent)} + */ + public Executor(QuadStore rmlStore, RecordsFactory recordsFactory, QuadStore resultingQuads, String baseIRI, final Agent functionAgent) throws Exception { + this(rmlStore, recordsFactory, resultingQuads, baseIRI, StrictMode.BEST_EFFORT, functionAgent); + } + + /** + * Call this if the data to process contains a specific End-of-File (EOF) marker. + * If it is not provided, it will be automatically inserted when necessary. + * Don't change unless you're generating LDES and know what you're doing! */ - public Executor(QuadStore rmlStore, RecordsFactory recordsFactory, FunctionLoader functionLoader, QuadStore resultingQuads, String baseIRI) throws Exception { - this(rmlStore, recordsFactory, functionLoader, resultingQuads, baseIRI, StrictMode.BEST_EFFORT); + public void setEOFProvidedInData() { + this.EOFProvidedInData = true; } - public Executor(QuadStore rmlStore, RecordsFactory recordsFactory, FunctionLoader functionLoader, QuadStore resultingQuads, String baseIRI, StrictMode strictMode) throws Exception { - this.initializer = new Initializer(rmlStore, functionLoader); + public Executor(QuadStore rmlStore, RecordsFactory recordsFactory, QuadStore resultingQuads, String baseIRI, StrictMode strictMode, final Agent functionAgent) throws Exception { + this(rmlStore, recordsFactory, resultingQuads, baseIRI, strictMode, functionAgent, null); + } + + public Executor(QuadStore rmlStore, RecordsFactory recordsFactory, QuadStore resultingQuads, String baseIRI, StrictMode strictMode, final Agent functionAgent, Map mappingOptions) throws Exception { + // Convert mapping file to RML if needed. + MappingConformer conformer = new MappingConformer(rmlStore, mappingOptions); + + try { + boolean conversionNeeded = conformer.conform(); + + if (conversionNeeded) { + logger.info("Conversion to RML was needed."); + } + } catch (Exception e) { + logger.error("Failed to make mapping file conformant to RML spec.", e); + } + + this.mappingOptimizer = new MappingOptimizer(rmlStore); + this.rmlStore = mappingOptimizer.optimizeMapping(); + this.initializer = new Initializer(this.rmlStore, functionAgent, baseIRI, strictMode); this.mappings = this.initializer.getMappings(); - this.rmlStore = rmlStore; this.recordsFactory = recordsFactory; - this.baseIRI = baseIRI; - this.strictMode = strictMode; - this.recordsHolders = new HashMap>(); - this.subjectCache = new HashMap>(); - this.targetStores = new HashMap(); + this.subjectCache = new HashMap<>(); + this.targetStores = new HashMap<>(); Executor.blankNodeCounter = 0; // Default store if no Targets are available for a triple - if (resultingQuads == null) { - this.resultingQuads = new RDF4JStore(); - } else { - this.resultingQuads = resultingQuads; - } + this.resultingQuads = Objects.requireNonNullElseGet(resultingQuads, RDF4JStore::new); // Output stores for Targets in Term Maps for (Map.Entry tm: this.mappings.entrySet()) { Mapping mapping = tm.getValue(); - Set targets = new HashSet(); // Subject Map MappingInfo subjectMapInfo = mapping.getSubjectMappingInfo(); - targets.addAll(subjectMapInfo.getTargets()); + Set targets = new HashSet<>(subjectMapInfo.getTargets()); // Predicate, Object and Language Maps for(PredicateObjectGraphMapping pog: mapping.getPredicateObjectGraphMappings()) { @@ -90,72 +117,48 @@ public Executor(QuadStore rmlStore, RecordsFactory recordsFactory, FunctionLoade if(pog.getObjectMappingInfo() != null) { targets.addAll(pog.getObjectMappingInfo().getTargets()); } + if(pog.getGraphMappingInfo() != null) { + targets.addAll(pog.getGraphMappingInfo().getTargets()); + } } - // Graph Map + // Graph Map (only subjectGraphMap...) for(MappingInfo g: mapping.getGraphMappingInfos()) { targets.addAll(g.getTargets()); } // Create stores for (Term t: targets) { - logger.debug("Adding target for " + t); + logger.debug("Adding target for {}", t); this.targetStores.put(t, new RDF4JStore()); } } } - public Executor(RDF4JStore rmlStore, RecordsFactory factory, FunctionLoader functionLoader, QuadStore outputStore) throws Exception { - this(rmlStore, factory, functionLoader, outputStore, rmlStore.getBase()); - } - - /* - * Backwards compatibility for the V4.X.X releases. - * This API will be deprecated in the first V5.X.X release in which this API will change to the new one. - */ - @Deprecated - public QuadStore execute(List triplesMaps, boolean removeDuplicates, MetadataGenerator metadataGenerator) throws Exception { - HashMap result = this.executeV5(triplesMaps, removeDuplicates, metadataGenerator); - return result.get(new NamedNode("rmlmapper://legacy.store")); - } - - @Deprecated - public QuadStore executeWithFunction(List triplesMaps, boolean removeDuplicates, BiConsumer pogFunction) throws Exception { - HashMap result = this.executeWithFunctionV5(triplesMaps, removeDuplicates, pogFunction); - return result.get(new NamedNode("rmlmapper://legacy.store")); - } - - @Deprecated - public QuadStore execute(List triplesMaps) throws Exception { - HashMap result = this.executeV5(triplesMaps, false, null); - return result.get(new NamedNode("rmlmapper://legacy.store")); - } - /* * New public API for the V5.X.X. releases */ - public HashMap executeV5(List triplesMaps, boolean removeDuplicates, MetadataGenerator metadataGenerator) throws Exception { + public Map execute(List triplesMaps, boolean removeDuplicates, MetadataGenerator metadataGenerator) throws Exception { - BiConsumer pogFunction; + POGFunction pogFunction; if (metadataGenerator != null && metadataGenerator.getDetailLevel().getLevel() >= MetadataGenerator.DETAIL_LEVEL.TRIPLE.getLevel()) { - pogFunction = (subject, pog) -> { - generateQuad(subject, pog.getPredicate(), pog.getObject(), pog.getGraph()); - metadataGenerator.insertQuad(new ProvenancedQuad(subject, pog.getPredicate(), pog.getObject(), pog.getGraph())); + pogFunction = (subject, predicate, object, graph, checkEOFMarker) -> { + if (generateQuad(subject, predicate, object, graph , checkEOFMarker)) { + metadataGenerator.insertQuad(new ProvenancedQuad(subject, predicate, object, graph)); + } }; } else { - pogFunction = (subject, pog) -> { - generateQuad(subject, pog.getPredicate(), pog.getObject(), pog.getGraph()); - }; + pogFunction = this::generateQuad; } - return executeWithFunctionV5(triplesMaps, removeDuplicates, pogFunction); + return executeWithFunction(triplesMaps, removeDuplicates, pogFunction); } - public HashMap executeWithFunctionV5(List triplesMaps, boolean removeDuplicates, BiConsumer pogFunction) throws Exception { + public Map executeWithFunction(List triplesMaps, boolean removeDuplicates, POGFunction pogFunction) throws Exception { //check if TriplesMaps are provided if (triplesMaps == null || triplesMaps.isEmpty()) { - triplesMaps = this.initializer.getTriplesMaps(); + triplesMaps = this.getTriplesMaps(); } //we execute every mapping @@ -166,73 +169,32 @@ public HashMap executeWithFunctionV5(List triplesMaps, bo for (int j = 0; j < records.size(); j++) { Record record = records.get(j); - ProvenancedTerm subject = getSubject(triplesMap, mapping, record, j); - - // If we have subject and it's a named node, - // we validate it and make it an absolute IRI if needed. - if (subject != null && subject.getTerm() instanceof NamedNode) { - String iri = subject.getTerm().getValue(); - - // Is the IRI valid? - if (!Utils.isValidIRI(iri)) { - if (strictMode.equals(StrictMode.STRICT)) { - throw new Exception("The subject \"" + iri + "\" is not a valid IRI."); - } else { - logger.error("The subject \"" + iri + "\" is not a valid IRI. Skipped."); - subject = null; - } - - // Is the IRI relative? - } else if (Utils.isRelativeIRI(iri)) { + List subjects = getSubject(triplesMap, mapping, record, j); - // Check the base IRI to see if we can use it to turn the IRI into an absolute one. - if (this.baseIRI == null) { - logger.error("The base IRI is null, so relative IRI of subject cannot be turned in to absolute IRI. Skipped."); - subject = null; - } else { - logger.debug("The IRI of subject is made absolute via base IRI."); - iri = this.baseIRI + iri; - - // Check if the new absolute IRI is valid. - if (Utils.isValidIRI(iri)) { - subject = new ProvenancedTerm(new NamedNode(iri), subject.getMetadata(), subject.getTargets()); - } else { - if (strictMode.equals(StrictMode.STRICT)) { - throw new Exception("The subject \"" + iri + "\" is not a valid IRI."); - } else { - logger.error("The subject \"" + iri + "\" is not a valid IRI. Skipped."); - } - } - } - } + if (subjects != null) { + generatePredicateObjectsForSubjects(subjects, mapping, record, pogFunction, EOFProvidedInData); } + } - final ProvenancedTerm finalSubject = subject; - - //TODO validate subject or check if blank node - if (subject != null) { - List subjectGraphs = new ArrayList<>(); - - mapping.getGraphMappingInfos().forEach(mappingInfo -> { - List terms = null; - - try { - terms = mappingInfo.getTermGenerator().generate(record); - } catch (Exception e) { - //todo be more nice and gentle - e.printStackTrace(); + if (!EOFProvidedInData) { + // Generate an EOF marker to indicate the end of the data source and run mappings once more (if not provided). + // This is a hack to call implicitDelete a final time, where it then returns the list of deleted records + TermGenerator generator = mapping.getSubjectMappingInfo().getTermGenerator(); + boolean needsEOFMarker = generator.needsEOFMarker(); + if (needsEOFMarker) { + Record record = new MarkerRecord(); + List subjects = new ArrayList<>(); + List nodes = generator.generate(record); + + if (!nodes.isEmpty()) { + List subjectTargets = getAllTargets(mapping.getSubjectMappingInfo(), record); + for (Term node : nodes) { + subjects.add(new ProvenancedTerm(node, null, subjectTargets)); } - - terms.forEach(term -> { - if (!term.equals(new NamedNode(NAMESPACES.RR + "defaultGraph"))) { - subjectGraphs.add(new ProvenancedTerm(term)); - } - }); - }); - - List pogs = this.generatePredicateObjectGraphs(mapping, record, subjectGraphs); - - pogs.forEach(pog -> pogFunction.accept(finalSubject, pog)); + } + // TODO this only works for the constants in the triples map! + // TODO `record` is not really used, we only need to generate a subject here + generatePredicateObjectsForSubjects(subjects, mapping, null, pogFunction, true); } } } @@ -246,75 +208,33 @@ public HashMap executeWithFunctionV5(List triplesMaps, bo return this.targetStores; } - public HashMap executeV5(List triplesMaps) throws Exception { - return this.executeV5(triplesMaps, false, null); + public Map execute(List triplesMaps) throws Exception { + return this.execute(triplesMaps, false, null); } + private boolean generateQuad(ProvenancedTerm subject, ProvenancedTerm predicate, ProvenancedTerm object, ProvenancedTerm graph, boolean checkEOFMarker) { + Term g = null; + Set targets = new HashSet<>(); - private List generatePredicateObjectGraphs(Mapping mapping, Record record, List alreadyNeededGraphs) throws Exception { - ArrayList results = new ArrayList<>(); - - List predicateObjectGraphMappings = mapping.getPredicateObjectGraphMappings(); - - for (PredicateObjectGraphMapping pogMapping : predicateObjectGraphMappings) { - ArrayList predicates = new ArrayList<>(); - ArrayList poGraphs = new ArrayList<>(); - poGraphs.addAll(alreadyNeededGraphs); - - if (pogMapping.getGraphMappingInfo() != null && pogMapping.getGraphMappingInfo().getTermGenerator() != null) { - pogMapping.getGraphMappingInfo().getTermGenerator().generate(record).forEach(term -> { - if (!term.equals(new NamedNode(NAMESPACES.RR + "defaultGraph"))) { - poGraphs.add(new ProvenancedTerm(term)); - } - }); + if (subject != null && predicate != null && object != null) { + if (graph != null) { + g = graph.getTerm(); + targets.addAll(graph.getTargets()); } - pogMapping.getPredicateMappingInfo().getTermGenerator().generate(record).forEach(p -> { - predicates.add(new ProvenancedTerm(p, pogMapping.getPredicateMappingInfo())); - }); - - if (pogMapping.getObjectMappingInfo() != null && pogMapping.getObjectMappingInfo().getTermGenerator() != null) { - List objects = pogMapping.getObjectMappingInfo().getTermGenerator().generate(record); - ArrayList provenancedObjects = new ArrayList<>(); - - objects.forEach(object -> { - provenancedObjects.add(new ProvenancedTerm(object, pogMapping.getObjectMappingInfo())); - }); - - if (objects.size() > 0) { - //add pogs - results.addAll(combineMultiplePOGs(predicates, provenancedObjects, poGraphs)); - } - - //check if we are dealing with a parentTriplesMap (RefObjMap) - } else if (pogMapping.getParentTriplesMap() != null) { - List objects; - - //check if need to apply a join condition - if (!pogMapping.getJoinConditions().isEmpty()) { - objects = this.getIRIsWithConditions(record, pogMapping.getParentTriplesMap(), pogMapping.getJoinConditions()); - //this.generateTriples(subject, po.getPredicateGenerator(), objects, record, combinedGraphs); - } else { - objects = this.getAllIRIs(pogMapping.getParentTriplesMap()); - } - - results.addAll(combineMultiplePOGs(predicates, objects, poGraphs)); + if (checkEOFMarker) { + if (subject.getTerm().getValue().contains(IDLabFunctions.MAGIC_MARKER) + || subject.getTerm().getValue().contains(IDLabFunctions.MAGIC_MARKER_ENCODED) + || predicate.getTerm().getValue().contains(IDLabFunctions.MAGIC_MARKER) + || predicate.getTerm().getValue().contains(IDLabFunctions.MAGIC_MARKER_ENCODED) + || object.getTerm().getValue().contains(IDLabFunctions.MAGIC_MARKER) + || object.getTerm().getValue().contains(IDLabFunctions.MAGIC_MARKER_ENCODED)) + return false; + + if (g != null && (g.getValue().contains(IDLabFunctions.MAGIC_MARKER) || g.getValue().contains(IDLabFunctions.MAGIC_MARKER_ENCODED))) + return false; } - } - return results; - } - - private void generateQuad(ProvenancedTerm subject, ProvenancedTerm predicate, ProvenancedTerm object, ProvenancedTerm graph) { - Term g = null; - Set targets = new HashSet(); - - if (graph != null) { - g = graph.getTerm(); - targets.addAll(graph.getTargets()); - } - - if (subject != null && predicate != null && object != null) { // Get all possible targets for triple, the Set guarantees that we don't have duplicates targets.addAll(subject.getTargets()); targets.addAll(predicate.getTargets()); @@ -322,7 +242,7 @@ private void generateQuad(ProvenancedTerm subject, ProvenancedTerm predicate, Pr // If we have targets, write to them if (!targets.isEmpty()) { - for(Term t: targets) { + for (Term t: targets) { this.targetStores.get(t).addQuad(subject.getTerm(), predicate.getTerm(), object.getTerm(), g); } } @@ -330,12 +250,16 @@ private void generateQuad(ProvenancedTerm subject, ProvenancedTerm predicate, Pr else { this.resultingQuads.addQuad(subject.getTerm(), predicate.getTerm(), object.getTerm(), g); } + + return true; } + + return false; } private List getIRIsWithConditions(Record record, Term triplesMap, List conditions) throws Exception { - ArrayList goodIRIs = new ArrayList(); - ArrayList> allIRIs = new ArrayList>(); + List goodIRIs = new ArrayList<>(); + List> allIRIs = new ArrayList<>(); for (MultipleRecordsFunctionExecutor condition : conditions) { allIRIs.add(this.getIRIsWithTrueCondition(record, triplesMap, condition)); @@ -365,22 +289,21 @@ private List getIRIsWithTrueCondition(Record child, Term triple //iterator over all the records corresponding with @triplesMap List records = this.getRecords(triplesMap); //this array contains all the IRIs that are valid regarding @path and @values - ArrayList iris = new ArrayList(); + List iris = new ArrayList<>(); for (int i = 0; i < records.size(); i++) { Record parent = records.get(i); - HashMap recordsMap = new HashMap<>(); + Map recordsMap = new HashMap<>(); recordsMap.put("child", child); recordsMap.put("parent", parent); Object expectedBoolean = condition.execute(recordsMap); - if (expectedBoolean instanceof Boolean) { - if ((boolean) expectedBoolean) { - ProvenancedTerm subject = this.getSubject(triplesMap, mapping, parent, i); - iris.add(subject); - } + if (Boolean.TRUE.equals(expectedBoolean)) { + List subjects = this.getSubject(triplesMap, mapping, parent, i); + if (subjects != null) + iris.addAll(subjects); } else { logger.warn("The used condition with the Parent Triples Map does not return a boolean."); } @@ -389,19 +312,26 @@ private List getIRIsWithTrueCondition(Record child, Term triple return iris; } - private ProvenancedTerm getSubject(Term triplesMap, Mapping mapping, Record record, int i) throws Exception { + private List getSubject(Term triplesMap, Mapping mapping, Record record, int i) throws Exception { if (!this.subjectCache.containsKey(triplesMap)) { - this.subjectCache.put(triplesMap, new HashMap()); + this.subjectCache.put(triplesMap, new HashMap<>()); } if (!this.subjectCache.get(triplesMap).containsKey(i)) { - List nodes = mapping.getSubjectMappingInfo().getTermGenerator().generate(record); + TermGenerator generator = mapping.getSubjectMappingInfo().getTermGenerator(); + List nodes = generator.generate(record); if (!nodes.isEmpty()) { - //todo: only create metadata when it's required + List subjectTargets = getAllTargets(mapping.getSubjectMappingInfo(), record); + List terms = new ArrayList<>(); Metadata meta = new Metadata(triplesMap, mapping.getSubjectMappingInfo().getTerm()); - List targets = mapping.getSubjectMappingInfo().getTargets(); - this.subjectCache.get(triplesMap).put(i, new ProvenancedTerm(nodes.get(0), meta, targets)); + + // TODO: only create metadata when it's required + for (Term node : nodes) { + terms.add(new ProvenancedTerm(node, meta, subjectTargets)); + } + this.subjectCache.get(triplesMap).put(i, terms); + return terms; } } @@ -412,13 +342,13 @@ private List getAllIRIs(Term triplesMap) throws Exception { Mapping mapping = this.mappings.get(triplesMap); List records = getRecords(triplesMap); - ArrayList iris = new ArrayList(); + List iris = new ArrayList<>(); for (int i = 0; i < records.size(); i++) { Record record = records.get(i); - ProvenancedTerm subject = getSubject(triplesMap, mapping, record, i); - - iris.add(subject); + List subjects = getSubject(triplesMap, mapping, record, i); + if (subjects != null) + iris.addAll(subjects); } return iris; @@ -432,24 +362,26 @@ private List getRecords(Term triplesMap) throws Exception { return this.recordsHolders.get(triplesMap); } - public FunctionLoader getFunctionLoader() { - return this.initializer.getFunctionLoader(); - } - private List combineMultiplePOGs(List predicates, List objects, List graphs) { - ArrayList results = new ArrayList<>(); + List results = new ArrayList<>(); if (graphs.isEmpty()) { graphs.add(null); } - predicates.forEach(p -> { - objects.forEach(o -> { - graphs.forEach(g -> { - results.add(new PredicateObjectGraph(p, o, g)); - }); - }); - }); + predicates.forEach( + p -> objects.forEach( + o -> graphs.forEach( + g -> { + if (g != null && g.getTerm().equals(new NamedNode(NAMESPACES.RML2 + "defaultGraph"))) { + results.add(new PredicateObjectGraph(p, o, null)); + } else { + results.add(new PredicateObjectGraph(p, o, g)); + } + } + ) + ) + ); return results; } @@ -462,6 +394,182 @@ public static String getNewBlankNodeID() { } public List getTriplesMaps() { - return initializer.getTriplesMaps(); + List withSubjectMaps = rmlStore.getQuads(null, new NamedNode(NAMESPACES.RML2 + "subjectMap"), null); + + return withSubjectMaps.stream() + .map(Quad::getSubject) + .filter(subject -> rmlStore.contains(subject, new NamedNode(NAMESPACES.RML2 + "logicalSource"), null)).toList(); + } + + public QuadStore getRMLStore() { + return this.rmlStore; + } + + public Map getTargets(){ + if (this.targetStores.isEmpty()){ + return null; + } + return this.targetStores; + } + + public void verifySources(String basepath, String mappingPath) throws Exception { + for (Term triplesMap : this.getTriplesMaps()) { + List logicalSources = Utils.getObjectsFromQuads(rmlStore.getQuads(triplesMap, new NamedNode(NAMESPACES.RML2 + "logicalSource"), null)); + Term logicalSource = logicalSources.get(0); + List sources = Utils.getObjectsFromQuads(rmlStore.getQuads(logicalSource, new NamedNode(NAMESPACES.RML2 + "source"), null)); + for (Term source : sources) { + String value = source.getValue(); + if (source instanceof Literal) { + InputStream is; + if (Utils.isRemoteFile(value)) { + is = new RemoteFileAccess(value).getInputStream(); + } else { + try { + is = new LocalFileAccess(value, basepath, ((Literal) source).getDatatype().stringValue()).getInputStream(); + } catch (NoSuchFileException e) { + is = new LocalFileAccess(value, mappingPath, ((Literal) source).getDatatype().stringValue()).getInputStream(); + } + } + is.close(); // close resources. + } + } + } + } + + private void generatePredicateObjectsForSubjects(final List subjects, + final Mapping mapping, + final Record record, + final POGFunction pogFunction, + final boolean checkEOFMarker) throws Exception { + for (ProvenancedTerm subject: subjects) { + //TODO validate subject or check if blank node + if (subject != null) { + List subjectGraphs = new ArrayList<>(); + + mapping.getGraphMappingInfos().forEach(mappingInfo -> { + List terms = null; + + try { + terms = mappingInfo.getTermGenerator().generate(record); + } catch (Exception e) { + //todo be more nice and gentle + logger.error("Could not generate graph term for record {}", record, e); + } + + if (terms != null) { + terms.forEach(term -> { + if (!term.equals(new NamedNode(NAMESPACES.RML2 + "defaultGraph"))) { + List subjectGraphTargets = getAllTargets(mappingInfo, record); + subjectGraphs.add(new ProvenancedTerm(term, null, subjectGraphTargets)); + } + }); + } + }); + + List pogs = new ArrayList<>(); + List predicateObjectGraphMappings = mapping.getPredicateObjectGraphMappings(); + + for (PredicateObjectGraphMapping pogMapping : predicateObjectGraphMappings) { + ArrayList predicates = new ArrayList<>(); + MappingInfo pogGraphMappingInfo = pogMapping.getGraphMappingInfo(); + MappingInfo pogPredicateMappingInfo = pogMapping.getPredicateMappingInfo(); + MappingInfo pogObjectMappingInfo = pogMapping.getObjectMappingInfo(); + + ArrayList poGraphs = new ArrayList<>(subjectGraphs); + + if (pogGraphMappingInfo != null) { + TermGenerator pogGraphGenerator = pogGraphMappingInfo.getTermGenerator(); + if (pogGraphGenerator != null) { + pogGraphGenerator.generate(record).forEach(term -> { + List graphTargets = getAllTargets(pogGraphMappingInfo, record); + poGraphs.add(new ProvenancedTerm(term, null, graphTargets)); + }); + } + } + + /* Predicates */ + if (pogPredicateMappingInfo != null) { + TermGenerator pogPredicateGenerator = pogPredicateMappingInfo.getTermGenerator(); + List predicateTargets = getAllTargets(pogPredicateMappingInfo, record); + pogPredicateGenerator.generate(record).forEach(p -> { + Metadata meta = new ProvenancedTerm(p, pogPredicateMappingInfo).getMetadata(); + predicates.add(new ProvenancedTerm(p, meta, predicateTargets)); + }); + } + + /* Objects */ + if (pogObjectMappingInfo != null) { + TermGenerator pogObjectGenerator = pogObjectMappingInfo.getTermGenerator(); + if (pogObjectGenerator != null) { + List objects = pogObjectGenerator.generate(record); + List objectTargets = getAllTargets(pogObjectMappingInfo, record); + List provenancedObjects = new ArrayList<>(); + objects.forEach(object -> { + Metadata meta = new ProvenancedTerm(object, pogObjectMappingInfo).getMetadata(); + provenancedObjects.add(new ProvenancedTerm(object, meta, objectTargets)); + }); + + if (!objects.isEmpty()) { + //add pogs + pogs.addAll(combineMultiplePOGs(predicates, provenancedObjects, poGraphs)); + } + } + + //check if we are dealing with a parentTriplesMap (RefObjMap) + } else if (pogMapping.getParentTriplesMap() != null) { + List objects; + + //check if need to apply a join condition + if (!pogMapping.getJoinConditions().isEmpty()) { + logger.debug("mapping {}'s join conditions are not empty", pogMapping.toString()); + objects = this.getIRIsWithConditions(record, pogMapping.getParentTriplesMap(), pogMapping.getJoinConditions()); + //this.generateTriples(subject, po.getPredicateGenerator(), objects, record, combinedGraphs); + } else { + logger.debug("mapping {}'s join conditions are empty", pogMapping.toString()); + objects = this.getAllIRIs(pogMapping.getParentTriplesMap()); + } + + // Remove targets from objects because they come from another triples map! + // They need to be "cloned" because they are terms that might be cached to use as subject, + // in which case the targets must NOT be removed. + final List objectsWithoutTargets = objects.stream().map(provenancedTerm -> { + List noTargets = Collections.emptyList(); + return new ProvenancedTerm( + provenancedTerm.getTerm(), provenancedTerm.getMetadata(), noTargets); + }).toList(); + + pogs.addAll(combineMultiplePOGs(predicates, objectsWithoutTargets, poGraphs)); + } + } + + pogs.forEach(pog -> pogFunction.generateQuad(subject, pog.getPredicate(), pog.getObject(), pog.getGraph(), checkEOFMarker)); + } + } } -} \ No newline at end of file + + private List getAllTargets(MappingInfo mappingInfo, Record record) { + List allTargets = new ArrayList<>(); + allTargets.addAll(generateTargetsAndAddToTargetStore(mappingInfo, record)); + allTargets.addAll(mappingInfo.getTargets()); + return allTargets; + } + + private List generateTargetsAndAddToTargetStore(MappingInfo mappingInfo, Record record) { + List targetGenerators = mappingInfo.getTargetGenerators(); + List generatedTargets = new ArrayList<>(); + for (TermGenerator targetGenerator : targetGenerators) { + try { + generatedTargets.addAll(targetGenerator.generate(record)); + } catch (Exception e) { + logger.error("Error occurred when generating target", e); + } + } + for (Term generatedTarget : generatedTargets){ + if(!targetStores.containsKey(generatedTarget)){ + targetStores.put(generatedTarget, new RDF4JStore()); + } + } + return generatedTargets; + } + +} diff --git a/src/main/java/be/ugent/rml/Initializer.java b/src/main/java/be/ugent/rml/Initializer.java index 6ee162b4..b5c4ab0c 100644 --- a/src/main/java/be/ugent/rml/Initializer.java +++ b/src/main/java/be/ugent/rml/Initializer.java @@ -1,35 +1,47 @@ package be.ugent.rml; -import be.ugent.rml.functions.FunctionLoader; +import be.ugent.idlab.knows.functions.agent.Agent; +import be.ugent.idlab.knows.functions.agent.AgentFactory; +import be.ugent.rml.store.Quad; import be.ugent.rml.store.QuadStore; import be.ugent.rml.term.NamedNode; import be.ugent.rml.term.Term; -import java.util.ArrayList; import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +/** + * + */ public class Initializer { private final MappingFactory factory; - private QuadStore rmlStore; - private FunctionLoader functionLoader; - private List triplesMaps; - private HashMap mappings; + private final QuadStore rmlStore; + private final List triplesMaps; + private final Map mappings; - public Initializer(QuadStore rmlStore, FunctionLoader functionLoader) throws Exception { + public Initializer(final QuadStore rmlStore, final Agent functionAgent, final String baseIRI, final StrictMode strictMode) throws Exception { this.rmlStore = rmlStore; //we get all the TriplesMaps from the mapping - this.triplesMaps = this.getAllTriplesMaps(); - this.mappings = new HashMap(); + List subjectMapQuads = rmlStore.getQuads(null, new NamedNode(NAMESPACES.RML2 + "subjectMap"), null); + subjectMapQuads.addAll(rmlStore.getQuads(null, new NamedNode(NAMESPACES.RML2 + "subject"), null)); - if (functionLoader == null) { - this.functionLoader = new FunctionLoader(); - } else { - this.functionLoader = functionLoader; - } + this.triplesMaps = subjectMapQuads.stream().map(Quad::getSubject).collect(Collectors.toList()); + + this.mappings = new HashMap<>(); + + + final Agent initialisedFunctionAgent = functionAgent == null ? + AgentFactory.createFromFnO("fno/functions_idlab.ttl", + "fno/functions_idlab_classes_java_mapping.ttl", + "fno_idlab_old/functions_idlab.ttl", "fno_idlab_old/functions_idlab_classes_java_mapping.ttl", + "functions_grel.ttl", + "grel_java_mapping.ttl") + : functionAgent; - this.factory = new MappingFactory(this.functionLoader); + this.factory = new MappingFactory(initialisedFunctionAgent, baseIRI, strictMode); extractMappings(); } @@ -39,36 +51,7 @@ private void extractMappings() throws Exception { } } - private List getAllTriplesMaps() { - List maps = Utils.getSubjectsFromQuads(this.rmlStore.getQuads(null, new NamedNode(NAMESPACES.RML + "logicalSource"), null)); - - //filter outer Triples Maps that are used for functions - ArrayList temp = new ArrayList<>(); - - for(Term map: maps) { - if (this.rmlStore.getQuads(null, new NamedNode(NAMESPACES.FNML + "functionValue"), map).isEmpty()) { - temp.add(map); - } - } - - maps = temp; - - if (maps.isEmpty()) { - throw new Error("No Triples Maps found. The mapping document you should at least have one Triples Map."); - } else { - return maps; - } - } - - public HashMap getMappings() { + public Map getMappings() { return this.mappings; } - - public List getTriplesMaps() { - return this.triplesMaps; - } - - public FunctionLoader getFunctionLoader() { - return this.functionLoader; - } } diff --git a/src/main/java/be/ugent/rml/MappingFactory.java b/src/main/java/be/ugent/rml/MappingFactory.java index c805e4ca..fe7631d2 100644 --- a/src/main/java/be/ugent/rml/MappingFactory.java +++ b/src/main/java/be/ugent/rml/MappingFactory.java @@ -1,8 +1,11 @@ package be.ugent.rml; +import be.ugent.idlab.knows.functions.agent.Agent; import be.ugent.rml.extractor.ConstantExtractor; +import be.ugent.rml.extractor.HashExtractor; import be.ugent.rml.extractor.ReferenceExtractor; import be.ugent.rml.functions.*; +import be.ugent.rml.store.Quad; import be.ugent.rml.store.QuadStore; import be.ugent.rml.term.Literal; import be.ugent.rml.term.NamedNode; @@ -21,10 +24,11 @@ import java.util.Map; import java.util.function.BiConsumer; +import static be.ugent.rml.Utils.getObjectsFromQuads; import static be.ugent.rml.Utils.isValidrrLanguage; public class MappingFactory { - private final FunctionLoader functionLoader; + private final Agent functionAgent; private MappingInfo subjectMappingInfo; private List graphMappingInfos; private Term triplesMap; @@ -33,10 +37,24 @@ public class MappingFactory { // This boolean is true when the double in a reference need to be ignored. // For example, when accessing data in a RDB. private boolean ignoreDoubleQuotes; + + // Base IRI to prepend to a relative IRI to make it absolute. + private final String baseIRI; + + // check on logical source is need on more than one place, so better store it + private Term logicalSource; + + // StrictMode determines RMLMapper's behaviour when an IRI for a NamedNode is invalid. + // If set to BEST_EFFORT, RMLMapper will not generate a NamedNode and go on. + // If set to STRICT, RMLMapper will stop execution with an exception. + private final StrictMode strictMode; + protected Logger logger = LoggerFactory.getLogger(this.getClass()); - public MappingFactory(FunctionLoader functionLoader) { - this.functionLoader = functionLoader; + public MappingFactory(final Agent functionAgent, final String baseIRI, final StrictMode strictMode) { + this.functionAgent = functionAgent; + this.baseIRI = baseIRI; + this.strictMode = strictMode; } public Mapping createMapping(Term triplesMap, QuadStore store) throws Exception { @@ -46,20 +64,27 @@ public Mapping createMapping(Term triplesMap, QuadStore store) throws Exception this.predicateObjectGraphMappings = new ArrayList<>(); this.graphMappingInfos = null; this.ignoreDoubleQuotes = this.areDoubleQuotesIgnored(store, triplesMap); + String triplesMapBaseIRI = this.baseIRI; - parseSubjectMap(); - parsePredicateObjectMaps(); - graphMappingInfos = parseGraphMapsAndShortcuts(subjectMappingInfo.getTerm()); + /* Override base IRI if specified by the rml:baseIRI predicate in a RML TriplesMap */ + if (!this.store.getQuads(triplesMap, new NamedNode(NAMESPACES.RML2 + "baseIRI"), null).isEmpty()) { + triplesMapBaseIRI = this.store.getQuads(triplesMap, new NamedNode(NAMESPACES.RML2 + "baseIRI"), null).get(0).getObject().getValue(); + } + + parseSubjectMap(triplesMapBaseIRI); + parsePredicateObjectMaps(triplesMapBaseIRI); + graphMappingInfos = parseGraphMapsAndShortcuts(subjectMappingInfo.getTerm(), triplesMapBaseIRI); //return the mapping return new Mapping(subjectMappingInfo, predicateObjectGraphMappings, graphMappingInfos); } - private void parseSubjectMap() throws Exception { + private void parseSubjectMap(String triplesMapBaseIRI) throws Exception { if (this.subjectMappingInfo == null) { TermGenerator generator; - List subjectmaps = Utils.getObjectsFromQuads(store.getQuads(triplesMap, new NamedNode(NAMESPACES.RR + "subjectMap"), null)); + List subjectmaps = getObjectsFromQuads(store.getQuads(triplesMap, new NamedNode(NAMESPACES.RML2 + "subject"), null)); + subjectmaps.addAll(getObjectsFromQuads(store.getQuads(triplesMap, new NamedNode(NAMESPACES.RML2 + "subjectMap"), null))); if (!subjectmaps.isEmpty()) { if (subjectmaps.size() > 1) { @@ -67,19 +92,19 @@ private void parseSubjectMap() throws Exception { } Term subjectmap = subjectmaps.get(0); - List functionValues = Utils.getObjectsFromQuads(store.getQuads(subjectmap, new NamedNode(NAMESPACES.FNML + "functionValue"), null)); - List termTypes = Utils.getObjectsFromQuads(store.getQuads(subjectmap, new NamedNode(NAMESPACES.RR + "termType"), null)); + List functionValues = getObjectsFromQuads(store.getQuads(subjectmap, new NamedNode(NAMESPACES.FNML + "functionValue"), null)); + List termTypes = getObjectsFromQuads(store.getQuads(subjectmap, new NamedNode(NAMESPACES.RML2 + "termType"), null)); - if (termTypes.contains(new NamedNode(NAMESPACES.RR + "Literal"))) { + if (termTypes.contains(new NamedNode(NAMESPACES.RML2 + "Literal"))) { throw new Exception(triplesMap + " is a Literal Term Map. Accepted term types for Subject Maps are: IRI, Blank Node"); } - boolean isBlankNode = !termTypes.isEmpty() && termTypes.get(0).equals(new NamedNode(NAMESPACES.RR + "BlankNode")); + boolean isBlankNode = !termTypes.isEmpty() && termTypes.get(0).equals(new NamedNode(NAMESPACES.RML2 + "BlankNode")); if (functionValues.isEmpty()) { //checking if we are dealing with a Blank Node as subject if (isBlankNode) { - SingleRecordFunctionExecutor executor = RecordFunctionExecutorFactory.generate(store, subjectmap, true, ignoreDoubleQuotes); + SingleRecordFunctionExecutor executor = RecordFunctionExecutorFactory.generate(store, subjectmap, true, ignoreDoubleQuotes, strictMode.equals(StrictMode.STRICT)); if (executor != null) { generator = new BlankNodeGenerator(executor); @@ -88,25 +113,26 @@ private void parseSubjectMap() throws Exception { } } else { //we are not dealing with a Blank Node, so we create the template - generator = new NamedNodeGenerator(RecordFunctionExecutorFactory.generate(store, subjectmap, true, ignoreDoubleQuotes)); + generator = new NamedNodeGenerator(RecordFunctionExecutorFactory.generate(store, subjectmap, true, ignoreDoubleQuotes, strictMode.equals(StrictMode.STRICT)), triplesMapBaseIRI, strictMode); } } else { - SingleRecordFunctionExecutor functionExecutor = parseFunctionTermMap(functionValues.get(0)); + SingleRecordFunctionExecutor functionExecutor = parseFunctionTermMap(functionValues.get(0), triplesMapBaseIRI); if (isBlankNode) { generator = new BlankNodeGenerator(functionExecutor); } else { - generator = new NamedNodeGenerator(functionExecutor); + generator = new NamedNodeGenerator(functionExecutor, triplesMapBaseIRI, strictMode); } } // get targets for subject - List targets = Utils.getObjectsFromQuads(store.getQuads(subjectmap, new NamedNode(NAMESPACES.RML + "logicalTarget"), null)); - - this.subjectMappingInfo = new MappingInfo(subjectmap, generator, targets); + // get Target Generators for subject + List targets = getTargets(subjectmap); + List targetGenerators = getTargetGenerators(subjectmap, strictMode, triplesMapBaseIRI); + this.subjectMappingInfo = new MappingInfo(subjectmap, generator, targets, targetGenerators); //get classes - List classes = Utils.getObjectsFromQuads(store.getQuads(subjectmap, new NamedNode(NAMESPACES.RR + "class"), null)); + List classes = getObjectsFromQuads(store.getQuads(subjectmap, new NamedNode(NAMESPACES.RML2 + "class"), null)); //we create predicateobjects for the classes for (Term c : classes) { @@ -114,8 +140,8 @@ private void parseSubjectMap() throws Exception { * Don't put in graph for rr:class, subject is already put in graph, otherwise double export. * Same holds for targets, the rdf:type triple will be exported to the subject target already. */ - NamedNodeGenerator predicateGenerator = new NamedNodeGenerator(new ConstantExtractor(NAMESPACES.RDF + "type")); - NamedNodeGenerator objectGenerator = new NamedNodeGenerator(new ConstantExtractor(c.getValue())); + NamedNodeGenerator predicateGenerator = new NamedNodeGenerator(new ConstantExtractor(NAMESPACES.RDF + "type"), triplesMapBaseIRI, strictMode); + NamedNodeGenerator objectGenerator = new NamedNodeGenerator(new ConstantExtractor(c.getValue()), triplesMapBaseIRI, strictMode); predicateObjectGraphMappings.add(new PredicateObjectGraphMapping( new MappingInfo(subjectmap, predicateGenerator), new MappingInfo(subjectmap, objectGenerator), @@ -127,20 +153,20 @@ private void parseSubjectMap() throws Exception { } } - private void parsePredicateObjectMaps() throws Exception { - List predicateobjectmaps = Utils.getObjectsFromQuads(store.getQuads(triplesMap, new NamedNode(NAMESPACES.RR + "predicateObjectMap"), null)); + private void parsePredicateObjectMaps(String triplesMapBaseIRI) throws Exception { + List predicateobjectmaps = getObjectsFromQuads(store.getQuads(triplesMap, new NamedNode(NAMESPACES.RML2 + "predicateObjectMap"), null)); for (Term pom : predicateobjectmaps) { - List predicateMappingInfos = parsePredicateMapsAndShortcuts(pom); - List graphMappingInfos = parseGraphMapsAndShortcuts(pom); + List predicateMappingInfos = parsePredicateMapsAndShortcuts(pom, triplesMapBaseIRI); + List graphMappingInfos = parseGraphMapsAndShortcuts(pom, triplesMapBaseIRI); - parseObjectMapsAndShortcutsAndGeneratePOGGenerators(pom, predicateMappingInfos, graphMappingInfos); + parseObjectMapsAndShortcutsAndGeneratePOGGenerators(pom, predicateMappingInfos, graphMappingInfos, triplesMapBaseIRI); } } - private void parseObjectMapsAndShortcutsAndGeneratePOGGenerators(Term termMap, List predicateMappingInfos, List graphMappingInfos) throws IOException { + private void parseObjectMapsAndShortcutsAndGeneratePOGGenerators(Term termMap, List predicateMappingInfos, List graphMappingInfos, String triplesMapBaseIRI) throws IOException { parseObjectMapsAndShortcutsWithCallback(termMap, (oMappingInfo, childOrParent) -> { - MappingInfo lMappingInfo = parseLanguageMappingInfo(oMappingInfo.getTerm()); + MappingInfo lMappingInfo = parseLanguageMappingInfo(oMappingInfo.getTerm(), triplesMapBaseIRI); predicateMappingInfos.forEach(pMappingInfo -> { if (graphMappingInfos.isEmpty()) { @@ -153,7 +179,7 @@ private void parseObjectMapsAndShortcutsAndGeneratePOGGenerators(Term termMap, L }); }, (parentTriplesMap, joinConditionFunctionExecutors) -> { predicateMappingInfos.forEach(pMappingInfo -> { - List pos = getPredicateObjectGraphMappingFromMultipleGraphMappingInfos(pMappingInfo, null, graphMappingInfos); + List pos = getPredicateObjectGraphMappingFromMultipleGraphMappingInfos(pMappingInfo, null, graphMappingInfos, triplesMapBaseIRI); pos.forEach(pogMappingInfo -> { pogMappingInfo.setParentTriplesMap(parentTriplesMap); @@ -165,27 +191,36 @@ private void parseObjectMapsAndShortcutsAndGeneratePOGGenerators(Term termMap, L predicateObjectGraphMappings.add(pogMappingInfo); }); }); - }); + }, triplesMapBaseIRI); } - private void parseObjectMapsAndShortcutsWithCallback(Term termMap, BiConsumer objectMapCallback, BiConsumer> refObjectMapCallback) throws IOException { - List objectmaps = Utils.getObjectsFromQuads(store.getQuads(termMap, new NamedNode(NAMESPACES.RR + "objectMap"), null)); + private void parseObjectMapsAndShortcutsWithCallback(Term termMap, BiConsumer objectMapCallback, BiConsumer> refObjectMapCallback, String triplesMapBaseIRI) throws IOException { + List objectmaps = getObjectsFromQuads(store.getQuads(termMap, new NamedNode(NAMESPACES.RML2 + "objectMap"), null)); for (Term objectmap : objectmaps) { - parseObjectMapWithCallback(objectmap, objectMapCallback, refObjectMapCallback); + parseObjectMapWithCallback(objectmap, objectMapCallback, refObjectMapCallback, triplesMapBaseIRI); } //dealing with rr:object - List objectsConstants = Utils.getObjectsFromQuads(store.getQuads(termMap, new NamedNode(NAMESPACES.RR + "object"), null)); + List objectsConstants = getObjectsFromQuads(store.getQuads(termMap, new NamedNode(NAMESPACES.RML2 + "object"), null)); for (Term o : objectsConstants) { TermGenerator gen; SingleRecordFunctionExecutor fn = new ConstantExtractor(o.getValue()); if (o instanceof Literal) { - gen = new LiteralGenerator(fn); + if (((Literal) o).getDatatype() != null) { + Term datatype = new NamedNode(((Literal) o).getDatatype().toString()); + gen = new LiteralGenerator(fn, datatype); + } else if (((Literal) o).getLanguage().isPresent()) { + SingleRecordFunctionExecutor executor = new ConstantExtractor(((Literal) o).getLanguage().get()); + gen = new LiteralGenerator(fn, executor); + } else { + gen = new LiteralGenerator(fn); + } + } else { - gen = new NamedNodeGenerator(fn); + gen = new NamedNodeGenerator(fn, triplesMapBaseIRI, strictMode); } // rr:object shortcut can never have targets @@ -193,24 +228,24 @@ private void parseObjectMapsAndShortcutsWithCallback(Term termMap, BiConsumer objectMapCallback, BiConsumer> refObjectMapCallback) throws IOException { - List functionValues = Utils.getObjectsFromQuads(store.getQuads(objectmap, new NamedNode(NAMESPACES.FNML + "functionValue"), null)); + private void parseObjectMapWithCallback(Term objectmap, BiConsumer objectMapCallback, BiConsumer> refObjectMapCallback, String triplesMapBaseIRI) throws IOException { + List functionValues = getObjectsFromQuads(store.getQuads(objectmap, new NamedNode(NAMESPACES.FNML + "functionValue"), null)); Term termType = getTermType(objectmap, true); - List datatypes = Utils.getObjectsFromQuads(store.getQuads(objectmap, new NamedNode(NAMESPACES.RR + "datatype"), null)); - List parentTriplesMaps = Utils.getObjectsFromQuads(store.getQuads(objectmap, new NamedNode(NAMESPACES.RR + "parentTriplesMap"), null)); - List parentTermMaps = Utils.getObjectsFromQuads(store.getQuads(objectmap, new NamedNode(NAMESPACES.RML + "parentTermMap"), null)); + List datatypes = getObjectsFromQuads(store.getQuads(objectmap, new NamedNode(NAMESPACES.RML2 + "datatype"), null)); + List parentTriplesMaps = getObjectsFromQuads(store.getQuads(objectmap, new NamedNode(NAMESPACES.RML2 + "parentTriplesMap"), null)); + List parentTermMaps = getObjectsFromQuads(store.getQuads(objectmap, new NamedNode(NAMESPACES.RML2 + "parentTermMap"), null)); - List languages = getLanguageExecutorsForObjectMap(objectmap); + List languages = getLanguageExecutorsForObjectMap(objectmap, triplesMapBaseIRI); if (functionValues.isEmpty()) { - boolean encodeIRI = termType != null && termType.getValue().equals(NAMESPACES.RR + "IRI"); - SingleRecordFunctionExecutor executor = RecordFunctionExecutorFactory.generate(store, objectmap, encodeIRI, ignoreDoubleQuotes); + boolean encodeIRI = termType != null && termType.getValue().equals(NAMESPACES.RML2 + "IRI"); + SingleRecordFunctionExecutor executor = RecordFunctionExecutorFactory.generate(store, objectmap, encodeIRI, ignoreDoubleQuotes, strictMode.equals(StrictMode.STRICT)); if (parentTriplesMaps.isEmpty() && parentTermMaps.isEmpty()) { TermGenerator oGen; - if (termType.equals(new NamedNode(NAMESPACES.RR + "Literal"))) { + if (termType.equals(new NamedNode(NAMESPACES.RML2 + "Literal"))) { //check if we need to apply a datatype to the object if (!datatypes.isEmpty()) { oGen = new LiteralGenerator(executor, datatypes.get(0)); @@ -220,8 +255,8 @@ private void parseObjectMapWithCallback(Term objectmap, BiConsumer languageMaps = Utils.getObjectsFromQuads(store.getQuads(objectmap, new NamedNode(NAMESPACES.RML + "languageMap"), null)); + List languageMaps = getObjectsFromQuads(store.getQuads(objectmap, new NamedNode(NAMESPACES.RML2 + "languageMap"), null)); - // get targets for object map - List oTargets = Utils.getObjectsFromQuads(store.getQuads(objectmap, new NamedNode(NAMESPACES.RML + "logicalTarget"), null)); + // get targets and targetGenerators for object map + List oTargets = getTargets(objectmap); + List oTargetGenerators = getTargetGenerators(objectmap, strictMode, triplesMapBaseIRI); - objectMapCallback.accept(new MappingInfo(objectmap, oGen, oTargets), "child"); + objectMapCallback.accept(new MappingInfo(objectmap, oGen, oTargets, oTargetGenerators), "child"); } else if (!parentTriplesMaps.isEmpty()) { if (parentTriplesMaps.size() > 1) { - logger.warn(triplesMap + " has " + parentTriplesMaps.size() + " Parent Triples Maps. You can only have one. A random one is taken."); + logger.warn("{} has {} Parent Triples Maps. You can only have one. A random one is taken.", triplesMap, parentTriplesMaps.size()); } Term parentTriplesMap = parentTriplesMaps.get(0); - List rrJoinConditions = Utils.getObjectsFromQuads(store.getQuads(objectmap, new NamedNode(NAMESPACES.RR + "joinCondition"), null)); - List rmljoinConditions = Utils.getObjectsFromQuads(store.getQuads(objectmap, new NamedNode(NAMESPACES.RML + "joinCondition"), null)); - ArrayList joinConditionFunctionExecutors = new ArrayList<>(); + List rmljoinConditions = getObjectsFromQuads(store.getQuads(objectmap, new NamedNode(NAMESPACES.RML + "joinCondition"), null)); + List joinConditionFunctionExecutors = new ArrayList<>(); + + List joinConditions = getObjectsFromQuads(store.getQuads(objectmap, new NamedNode(NAMESPACES.RML2 + "joinCondition"), null)); - for (Term joinCondition : rrJoinConditions) { + for (Term joinCondition : joinConditions) { - List parents = Utils.getLiteralObjectsFromQuads(store.getQuads(joinCondition, new NamedNode(NAMESPACES.RR + "parent"), null)); - List childs = Utils.getLiteralObjectsFromQuads(store.getQuads(joinCondition, new NamedNode(NAMESPACES.RR + "child"), null)); + List parents = Utils.getLiteralObjectsFromQuads(store.getQuads(joinCondition, new NamedNode(NAMESPACES.RML2 + "parent"), null)); + List childs = Utils.getLiteralObjectsFromQuads(store.getQuads(joinCondition, new NamedNode(NAMESPACES.RML2 + "child"), null)); if (parents.isEmpty()) { throw new Error("One of the join conditions of " + triplesMap + " is missing rr:parent."); } else if (childs.isEmpty()) { throw new Error("One of the join conditions of " + triplesMap + " is missing rr:child."); } else { - FunctionModel equal = functionLoader.getFunction(new NamedNode("http://example.com/idlab/function/equal")); Map parameters = new HashMap<>(); boolean ignoreDoubleQuotesInParent = this.areDoubleQuotesIgnored(store, parentTriplesMap); - SingleRecordFunctionExecutor parent = new ReferenceExtractor(parents.get(0), ignoreDoubleQuotesInParent); + SingleRecordFunctionExecutor parent = new ReferenceExtractor(parents.get(0), ignoreDoubleQuotesInParent, strictMode.equals(StrictMode.STRICT)); Object[] detailsParent = {"parent", parent}; parameters.put("http://users.ugent.be/~bjdmeest/function/grel.ttl#valueParameter", detailsParent); - SingleRecordFunctionExecutor child = new ReferenceExtractor(childs.get(0), ignoreDoubleQuotes); + SingleRecordFunctionExecutor child = new ReferenceExtractor(childs.get(0), ignoreDoubleQuotes, strictMode.equals(StrictMode.STRICT)); Object[] detailsChild = {"child", child}; parameters.put("http://users.ugent.be/~bjdmeest/function/grel.ttl#valueParameter2", detailsChild); - joinConditionFunctionExecutors.add(new StaticMultipleRecordsFunctionExecutor(equal, parameters)); + joinConditionFunctionExecutors.add(new StaticMultipleRecordsFunctionExecutor(parameters, functionAgent, "https://w3id.org/imec/idlab/function#equal")); } } for (Term joinCondition : rmljoinConditions) { - Term functionValue = Utils.getObjectsFromQuads(store.getQuads(joinCondition, new NamedNode(NAMESPACES.FNML + "functionValue"), null)).get(0); - joinConditionFunctionExecutors.add(parseJoinConditionFunctionTermMap(functionValue)); + // TODO fix this for KGC_fnml + Term functionValue = getObjectsFromQuads(store.getQuads(joinCondition, new NamedNode(NAMESPACES.FNML + "functionValue"), null)).get(0); + joinConditionFunctionExecutors.add(parseJoinConditionFunctionTermMap(functionValue, triplesMapBaseIRI)); + } + + // get logical source of parentTriplesMap + List logicalSources = getObjectsFromQuads(store.getQuads(this.triplesMap, new NamedNode(NAMESPACES.RML2 + "logicalSource"), null)); + Term logicalSource = null; + if (!logicalSources.isEmpty()) { + logicalSource = logicalSources.get(0); } + List parentLogicalSources = getObjectsFromQuads(store.getQuads(parentTriplesMap, new NamedNode(NAMESPACES.RML2 + "logicalSource"), null)); + Term parentLogicalSource = null; + if (!parentLogicalSources.isEmpty()) { + parentLogicalSource = parentLogicalSources.get(0); + } + // Check if there is at least one Logical Source. if (refObjectMapCallback != null) { refObjectMapCallback.accept(parentTriplesMap, joinConditionFunctionExecutors); } } else if (!parentTermMaps.isEmpty()) { parseObjectMapWithCallback(parentTermMaps.get(0), (objectGenerator, childOrParent) -> { objectMapCallback.accept(objectGenerator, "parent"); - }, null); + }, null, triplesMapBaseIRI); } } else { - SingleRecordFunctionExecutor functionExecutor = parseFunctionTermMap(functionValues.get(0)); + SingleRecordFunctionExecutor functionExecutor = parseFunctionTermMap(functionValues.get(0), triplesMapBaseIRI); TermGenerator gen; //TODO is literal the default? - if (termType == null || termType.equals(new NamedNode(NAMESPACES.RR + "Literal"))) { + if (termType == null || termType.equals(new NamedNode(NAMESPACES.RML2 + "Literal"))) { //check if we need to apply a datatype to the object if (!datatypes.isEmpty()) { gen = new LiteralGenerator(functionExecutor, datatypes.get(0)); @@ -306,30 +356,32 @@ private void parseObjectMapWithCallback(Term objectmap, BiConsumer targets = Utils.getObjectsFromQuads(store.getQuads(objectmap, new NamedNode(NAMESPACES.RML + "logicalTarget"), null)); + // get targets and targetGenerators for object map + List targets = getTargets(objectmap); + List targetGenerators = getTargetGenerators(objectmap, strictMode, triplesMapBaseIRI); + + objectMapCallback.accept(new MappingInfo(objectmap, gen, targets, targetGenerators), "child"); - objectMapCallback.accept(new MappingInfo(objectmap, gen, targets), "child"); } } - private List parseGraphMapsAndShortcuts(Term termMap) throws Exception { - ArrayList graphMappingInfos = new ArrayList<>(); + private List parseGraphMapsAndShortcuts(Term termMap, String triplesMapBaseIRI) throws Exception { + List graphMappingInfos = new ArrayList<>(); - List graphMaps = Utils.getObjectsFromQuads(store.getQuads(termMap, new NamedNode(NAMESPACES.RR + "graphMap"), null)); + List graphMaps = getObjectsFromQuads(store.getQuads(termMap, new NamedNode(NAMESPACES.RML2 + "graphMap"), null)); for (Term graphMap : graphMaps) { - List functionValues = Utils.getObjectsFromQuads(store.getQuads(graphMap, new NamedNode(NAMESPACES.FNML + "functionValue"), null)); - List termTypes = Utils.getObjectsFromQuads(store.getQuads(graphMap, new NamedNode(NAMESPACES.RR + "termType"), null)); + List functionValues = getObjectsFromQuads(store.getQuads(graphMap, new NamedNode(NAMESPACES.FNML + "functionValue"), null)); + List termTypes = getObjectsFromQuads(store.getQuads(graphMap, new NamedNode(NAMESPACES.RML2 + "termType"), null)); Term termType = null; if (!termTypes.isEmpty()) { termType = termTypes.get(0); - if (termType.equals(new NamedNode(NAMESPACES.RR + "Literal"))) { + if (termType.equals(new NamedNode(NAMESPACES.RML2 + "Literal"))) { throw new Exception("A Graph Map cannot generate literals."); } } @@ -337,10 +389,10 @@ private List parseGraphMapsAndShortcuts(Term termMap) throws Except TermGenerator generator; if (functionValues.isEmpty()) { - SingleRecordFunctionExecutor executor = RecordFunctionExecutorFactory.generate(store, graphMap, true, ignoreDoubleQuotes); + SingleRecordFunctionExecutor executor = RecordFunctionExecutorFactory.generate(store, graphMap, true, ignoreDoubleQuotes, strictMode.equals(StrictMode.STRICT)); - if (termType == null || termType.equals(new NamedNode(NAMESPACES.RR + "IRI"))) { - generator = new NamedNodeGenerator(executor); + if (termType == null || termType.equals(new NamedNode(NAMESPACES.RML2 + "IRI"))) { + generator = new NamedNodeGenerator(executor, triplesMapBaseIRI, strictMode); } else { if (executor == null) { generator = new BlankNodeGenerator(); @@ -349,73 +401,76 @@ private List parseGraphMapsAndShortcuts(Term termMap) throws Except } } } else { - SingleRecordFunctionExecutor functionExecutor = parseFunctionTermMap(functionValues.get(0)); + SingleRecordFunctionExecutor functionExecutor = parseFunctionTermMap(functionValues.get(0), triplesMapBaseIRI); - if (termType == null || termType.equals(new NamedNode(NAMESPACES.RR + "IRI"))) { - generator = new NamedNodeGenerator(functionExecutor); + if (termType == null || termType.equals(new NamedNode(NAMESPACES.RML2 + "IRI"))) { + generator = new NamedNodeGenerator(functionExecutor, triplesMapBaseIRI, strictMode); } else { generator = new BlankNodeGenerator(functionExecutor); } } - // get targets for graph maps - List targets = Utils.getObjectsFromQuads(store.getQuads(graphMap, new NamedNode(NAMESPACES.RML + "logicalTarget"), null)); + // get targets and target generators for graph map + List targets = getTargets(graphMap); + List targetGenerators = getTargetGenerators(graphMap, strictMode, triplesMapBaseIRI); + + graphMappingInfos.add(new MappingInfo(termMap, generator, targets, targetGenerators)); - graphMappingInfos.add(new MappingInfo(termMap, generator, targets)); } - List graphShortcuts = Utils.getObjectsFromQuads(store.getQuads(termMap, new NamedNode(NAMESPACES.RR + "graph"), null)); + List graphShortcuts = getObjectsFromQuads(store.getQuads(termMap, new NamedNode(NAMESPACES.RML2 + "graph"), null)); for (Term graph : graphShortcuts) { String gStr = graph.getValue(); // rr:graph shortcut can never have targets - graphMappingInfos.add(new MappingInfo(termMap, new NamedNodeGenerator(new ConstantExtractor(gStr)))); + graphMappingInfos.add(new MappingInfo(termMap, new NamedNodeGenerator(new ConstantExtractor(gStr), triplesMapBaseIRI, strictMode))); } return graphMappingInfos; } - private List parsePredicateMapsAndShortcuts(Term termMap) throws IOException { - ArrayList predicateMappingInfos = new ArrayList<>(); + private List parsePredicateMapsAndShortcuts(Term termMap, String triplesMapBaseIRI) throws IOException { + List predicateMappingInfos = new ArrayList<>(); - List predicateMaps = Utils.getObjectsFromQuads(store.getQuads(termMap, new NamedNode(NAMESPACES.RR + "predicateMap"), null)); + List predicateMaps = getObjectsFromQuads(store.getQuads(termMap, new NamedNode(NAMESPACES.RML2 + "predicateMap"), null)); for (Term predicateMap : predicateMaps) { // get functionValue for predicate maps - List functionValues = Utils.getObjectsFromQuads(store.getQuads(predicateMap, new NamedNode(NAMESPACES.FNML + "functionValue"), null)); + List functionValues = getObjectsFromQuads(store.getQuads(predicateMap, new NamedNode(NAMESPACES.FNML + "functionValue"), null)); - // get targets for predicate maps - List targets = Utils.getObjectsFromQuads(store.getQuads(predicateMap, new NamedNode(NAMESPACES.RML + "logicalTarget"), null)); + // get target generators for predicate maps + List targets = getTargets(predicateMap); + List targetGenerators = getTargetGenerators(predicateMap, strictMode, triplesMapBaseIRI); if (functionValues.isEmpty()) { predicateMappingInfos.add(new MappingInfo(predicateMap, - new NamedNodeGenerator(RecordFunctionExecutorFactory.generate(store, predicateMap, false, ignoreDoubleQuotes)), - targets)); + new NamedNodeGenerator(RecordFunctionExecutorFactory.generate(store, predicateMap, false, ignoreDoubleQuotes, strictMode.equals(StrictMode.STRICT)), triplesMapBaseIRI, strictMode), + targets, targetGenerators)); } else { - SingleRecordFunctionExecutor functionExecutor = parseFunctionTermMap(functionValues.get(0)); + SingleRecordFunctionExecutor functionExecutor = parseFunctionTermMap(functionValues.get(0), triplesMapBaseIRI); - predicateMappingInfos.add(new MappingInfo(predicateMap, new NamedNodeGenerator(functionExecutor), targets)); + predicateMappingInfos.add(new MappingInfo(predicateMap, new NamedNodeGenerator(functionExecutor, triplesMapBaseIRI, strictMode), targets, targetGenerators)); } } - List predicateShortcuts = Utils.getObjectsFromQuads(store.getQuads(termMap, new NamedNode(NAMESPACES.RR + "predicate"), null)); + List predicateShortcuts = getObjectsFromQuads(store.getQuads(termMap, new NamedNode(NAMESPACES.RML2 + "predicate"), null)); for (Term predicate : predicateShortcuts) { String pStr = predicate.getValue(); // rr:predicate shortcut can never have targets - predicateMappingInfos.add(new MappingInfo(termMap, new NamedNodeGenerator(new ConstantExtractor(pStr)))); + predicateMappingInfos.add(new MappingInfo(termMap, new NamedNodeGenerator(new ConstantExtractor(pStr), triplesMapBaseIRI, strictMode))); } return predicateMappingInfos; } - private SingleRecordFunctionExecutor parseFunctionTermMap(Term functionValue) throws IOException { - List functionPOMs = Utils.getObjectsFromQuads(store.getQuads(functionValue, new NamedNode(NAMESPACES.RR + "predicateObjectMap"), null)); + private SingleRecordFunctionExecutor parseFunctionTermMap(Term functionValue, String triplesMapBaseIRI) throws IOException { + List functionPOMs = getObjectsFromQuads(store.getQuads(functionValue, new NamedNode(NAMESPACES.RML2 + "predicateObjectMap"), null)); ArrayList params = new ArrayList<>(); for (Term pom : functionPOMs) { - List pMappingInfos = parsePredicateMapsAndShortcuts(pom); - List oMappingInfos = parseObjectMapsAndShortcuts(pom); + List pMappingInfos = parsePredicateMapsAndShortcuts(pom, triplesMapBaseIRI); + List oMappingInfos = parseObjectMapsAndShortcuts(pom, triplesMapBaseIRI); List pGenerators = new ArrayList<>(); pMappingInfos.forEach(mappingInfo -> { @@ -430,15 +485,15 @@ private SingleRecordFunctionExecutor parseFunctionTermMap(Term functionValue) th params.add(new ParameterValuePair(pGenerators, oGenerators)); } - return new DynamicSingleRecordFunctionExecutor(params, functionLoader); + return new DynamicSingleRecordFunctionExecutor(params, functionAgent); } - private MultipleRecordsFunctionExecutor parseJoinConditionFunctionTermMap(Term functionValue) throws IOException { - List functionPOMs = Utils.getObjectsFromQuads(store.getQuads(functionValue, new NamedNode(NAMESPACES.RR + "predicateObjectMap"), null)); + private MultipleRecordsFunctionExecutor parseJoinConditionFunctionTermMap(Term functionValue, String triplesMapBaseIRI) throws IOException { + List functionPOMs = getObjectsFromQuads(store.getQuads(functionValue, new NamedNode(NAMESPACES.RML2 + "predicateObjectMap"), null)); ArrayList params = new ArrayList<>(); for (Term pom : functionPOMs) { - List pMappingInfos = parsePredicateMapsAndShortcuts(pom); + List pMappingInfos = parsePredicateMapsAndShortcuts(pom, triplesMapBaseIRI); List pGenerators = new ArrayList<>(); pMappingInfos.forEach(mappingInfo -> { @@ -448,21 +503,40 @@ private MultipleRecordsFunctionExecutor parseJoinConditionFunctionTermMap(Term f ArrayList objectGeneratorOriginPairs = new ArrayList<>(); parseObjectMapsAndShortcutsWithCallback(pom, (oGen, childOrParent) -> { objectGeneratorOriginPairs.add(new TermGeneratorOriginPair(oGen.getTermGenerator(), childOrParent)); - }, null); + }, null, triplesMapBaseIRI); params.add(new ParameterValueOriginPair(pGenerators, objectGeneratorOriginPairs)); } - return new DynamicMultipleRecordsFunctionExecutor(params, functionLoader); + return new DynamicMultipleRecordsFunctionExecutor(params, functionAgent); + } + + /** + * Generate a join condition that only returns true if the same record hash is encountered + * @return + * @throws IOException + */ + private MultipleRecordsFunctionExecutor generateSameLogicalSourceJoinConditionFunctionTermMap() throws IOException { + Map parameters = new HashMap<>(); + + SingleRecordFunctionExecutor parent = new HashExtractor(); + Object[] detailsParent = {"parent", parent}; + parameters.put("http://users.ugent.be/~bjdmeest/function/grel.ttl#valueParameter", detailsParent); + + SingleRecordFunctionExecutor child = new HashExtractor(); + Object[] detailsChild = {"child", child}; + parameters.put("http://users.ugent.be/~bjdmeest/function/grel.ttl#valueParameter2", detailsChild); + + return new StaticMultipleRecordsFunctionExecutor(parameters, functionAgent, "https://w3id.org/imec/idlab/function#equal"); } - private List parseObjectMapsAndShortcuts(Term pom) throws IOException { + private List parseObjectMapsAndShortcuts(Term pom, String triplesMapBaseIRI) throws IOException { List mappingInfos = new ArrayList<>(); parseObjectMapsAndShortcutsWithCallback(pom, (mappingInfo, childOrParent) -> { mappingInfos.add(mappingInfo); }, (term, joinConditionFunctions) -> { - }); + }, triplesMapBaseIRI); return mappingInfos; } @@ -470,13 +544,14 @@ private List parseObjectMapsAndShortcuts(Term pom) throws IOExcepti /** * This method returns all executors for the languages of an Object Map. * @param objectmap the object for which the executors need to be determined. + * @param triplesMapBaseIRI the base IRI to use for this Triples Map. * @return a list of executors that return language tags. */ - private List getLanguageExecutorsForObjectMap(Term objectmap) throws IOException { + private List getLanguageExecutorsForObjectMap(Term objectmap, String triplesMapBaseIRI) throws IOException { ArrayList executors = new ArrayList<>(); // Parse rr:language - List languages = Utils.getObjectsFromQuads(store.getQuads(objectmap, new NamedNode(NAMESPACES.RR + "language"), null)); + List languages = getObjectsFromQuads(store.getQuads(objectmap, new NamedNode(NAMESPACES.RML2 + "language"), null)); // Validate languages. languages.stream().map(Term::getValue).forEach(language -> {if (! isValidrrLanguage(language)) { @@ -488,22 +563,22 @@ private List getLanguageExecutorsForObjectMap(Term } // Parse rml:languageMap - List languageMaps = Utils.getObjectsFromQuads(store.getQuads(objectmap, new NamedNode(NAMESPACES.RML + "languageMap"), null)); + List languageMaps = getObjectsFromQuads(store.getQuads(objectmap, new NamedNode(NAMESPACES.RML2 + "languageMap"), null)); for (Term languageMap : languageMaps) { - List functionValues = Utils.getObjectsFromQuads(store.getQuads(languageMap, new NamedNode(NAMESPACES.FNML + "functionValue"), null)); + List functionValues = getObjectsFromQuads(store.getQuads(languageMap, new NamedNode(NAMESPACES.FNML + "functionValue"), null)); if (functionValues.isEmpty()) { - executors.add(RecordFunctionExecutorFactory.generate(store, languageMap, false, ignoreDoubleQuotes)); + executors.add(RecordFunctionExecutorFactory.generate(store, languageMap, false, ignoreDoubleQuotes, strictMode.equals(StrictMode.STRICT))); } else { - executors.add(parseFunctionTermMap(functionValues.get(0))); + executors.add(parseFunctionTermMap(functionValues.get(0), triplesMapBaseIRI)); } } return executors; } - private MappingInfo parseLanguageMappingInfo(Term objectMap) { + private MappingInfo parseLanguageMappingInfo(Term objectMap, String triplesMapBaseIRI) { // get optional language map targets for object map MappingInfo mappingInfo = null; @@ -511,11 +586,12 @@ private MappingInfo parseLanguageMappingInfo(Term objectMap) { return mappingInfo; } - List languageMaps = Utils.getObjectsFromQuads(store.getQuads(objectMap, new NamedNode(NAMESPACES.RML + "languageMap"), null)); + List languageMaps = getObjectsFromQuads(store.getQuads(objectMap, new NamedNode(NAMESPACES.RML2 + "languageMap"), null)); if (languageMaps.size() == 1) { Term l = languageMaps.get(0); - List lTargets = Utils.getObjectsFromQuads(store.getQuads(l, new NamedNode(NAMESPACES.RML + "logicalTarget"), null)); - mappingInfo = new MappingInfo(l, lTargets); + List lTargets = getTargets(l); + List lTargetGenerators = getTargetGenerators(l, strictMode, triplesMapBaseIRI); + mappingInfo = new MappingInfo(l, lTargets, lTargetGenerators); } else if (languageMaps.size() > 1) { logger.warn("Multiple language maps found, a random language map is used"); @@ -528,50 +604,50 @@ else if (languageMaps.size() > 1) { * If no Term Type is found, a default Term Type is return based on the R2RML specification. **/ private Term getTermType(Term map, boolean isObjectMap) { - List termTypes = Utils.getObjectsFromQuads(store.getQuads(map, new NamedNode(NAMESPACES.RR + "termType"), null)); + List termTypes = getObjectsFromQuads(store.getQuads(map, new NamedNode(NAMESPACES.RML2 + "termType"), null)); Term termType = null; if (!termTypes.isEmpty()) { termType = termTypes.get(0); } else { - List constants = Utils.getObjectsFromQuads(store.getQuads(map, new NamedNode(NAMESPACES.RR + "constant"), null)); + List constants = getObjectsFromQuads(store.getQuads(map, new NamedNode(NAMESPACES.RML2 + "constant"), null)); if (!constants.isEmpty()) { Term constant = constants.get(0); if (constant instanceof Literal) { - termType = new NamedNode(NAMESPACES.RR + "Literal"); + termType = new NamedNode(NAMESPACES.RML2 + "Literal"); } else if (constant instanceof NamedNode) { - termType = new NamedNode(NAMESPACES.RR + "IRI"); + termType = new NamedNode(NAMESPACES.RML2 + "IRI"); } else { - termType = new NamedNode(NAMESPACES.RR + "BlankNode"); + termType = new NamedNode(NAMESPACES.RML2 + "BlankNode"); } } else if (isObjectMap) { - boolean hasReference = !Utils.getObjectsFromQuads(store.getQuads(map, new NamedNode(NAMESPACES.RML + "reference"), null)).isEmpty(); - boolean hasFunctionValues = !Utils.getObjectsFromQuads(store.getQuads(map, new NamedNode(NAMESPACES.FNML + "functionValue"), null)).isEmpty(); - boolean hasLanguage = !Utils.getObjectsFromQuads(store.getQuads(map, new NamedNode(NAMESPACES.RR + "language"), null)).isEmpty() || - !Utils.getObjectsFromQuads(store.getQuads(map, new NamedNode(NAMESPACES.RML + "languageMap"), null)).isEmpty(); - boolean hasDatatype = !Utils.getObjectsFromQuads(store.getQuads(map, new NamedNode(NAMESPACES.RR + "datatype"), null)).isEmpty(); + boolean hasReference = !getObjectsFromQuads(store.getQuads(map, new NamedNode(NAMESPACES.RML2 + "reference"), null)).isEmpty(); + boolean hasFunctionValues = !getObjectsFromQuads(store.getQuads(map, new NamedNode(NAMESPACES.FNML + "functionValue"), null)).isEmpty(); + boolean hasLanguage = !getObjectsFromQuads(store.getQuads(map, new NamedNode(NAMESPACES.RML2 + "language"), null)).isEmpty() || + !getObjectsFromQuads(store.getQuads(map, new NamedNode(NAMESPACES.RML2 + "languageMap"), null)).isEmpty(); + boolean hasDatatype = !getObjectsFromQuads(store.getQuads(map, new NamedNode(NAMESPACES.RML2 + "datatype"), null)).isEmpty(); if (hasReference || hasLanguage || hasDatatype || hasFunctionValues) { - termType = new NamedNode(NAMESPACES.RR + "Literal"); + termType = new NamedNode(NAMESPACES.RML2 + "Literal"); } else { - termType = new NamedNode(NAMESPACES.RR + "IRI"); + termType = new NamedNode(NAMESPACES.RML2 + "IRI"); } } else { - termType = new NamedNode(NAMESPACES.RR + "IRI"); + termType = new NamedNode(NAMESPACES.RML2 + "IRI"); } } return termType; } - private List getPredicateObjectGraphMappingFromMultipleGraphMappingInfos(MappingInfo pMappingInfo, MappingInfo oMappingInfo, List gMappingInfos) { + private List getPredicateObjectGraphMappingFromMultipleGraphMappingInfos(MappingInfo pMappingInfo, MappingInfo oMappingInfo, List gMappingInfos, String triplesMapBaseIRI) { ArrayList list = new ArrayList<>(); MappingInfo lMappingInfo = null; if(oMappingInfo != null) { - lMappingInfo = parseLanguageMappingInfo(oMappingInfo.getTerm()); + lMappingInfo = parseLanguageMappingInfo(oMappingInfo.getTerm(), triplesMapBaseIRI); } for(MappingInfo gMappingInfo: gMappingInfos) { @@ -592,18 +668,18 @@ private List getPredicateObjectGraphMappingFromMult * @return true if double quotes should be ignored in references, else false. */ private boolean areDoubleQuotesIgnored(QuadStore store, Term triplesMap) { - List logicalSources = Utils.getObjectsFromQuads(store.getQuads(triplesMap, new NamedNode(NAMESPACES.RML + "logicalSource"), null)); + List logicalSources = getObjectsFromQuads(store.getQuads(triplesMap, new NamedNode(NAMESPACES.RML2 + "logicalSource"), null)); if (!logicalSources.isEmpty()) { Term logicalSource = logicalSources.get(0); - List sources = Utils.getObjectsFromQuads(store.getQuads(logicalSource, new NamedNode(NAMESPACES.RML + "source"), null)); + List sources = getObjectsFromQuads(store.getQuads(logicalSource, new NamedNode(NAMESPACES.RML2 + "source"), null)); if (!sources.isEmpty()) { Term source = sources.get(0); if (! (sources.get(0) instanceof Literal)) { - List sourceType = Utils.getObjectsFromQuads(store.getQuads(source, new NamedNode(NAMESPACES.RDF + "type"), null)); + List sourceType = getObjectsFromQuads(store.getQuads(source, new NamedNode(NAMESPACES.RDF + "type"), null)); return sourceType.get(0).getValue().equals(NAMESPACES.D2RQ + "Database"); } @@ -612,4 +688,35 @@ private boolean areDoubleQuotesIgnored(QuadStore store, Term triplesMap) { return false; } + + private List getTargetGenerators(Term termMap, StrictMode strictMode, String triplesMapBaseIRI) { + List targetGenerators = new ArrayList<>(); + List logicalTargetMaps = Utils.getObjectsFromQuads(store.getQuads(termMap, new NamedNode(NAMESPACES.RMLE + "logicalTargetMap"), null)); + for (Term logicalTargetMap : logicalTargetMaps) { + SingleRecordFunctionExecutor functionExecutor = null; + List functionValues = getObjectsFromQuads(store.getQuads(logicalTargetMap, new NamedNode(NAMESPACES.FNML + "functionValue"), null)); + if (functionValues.isEmpty()) { + //similar to subjects, dynamic targets should always be uri + functionExecutor = RecordFunctionExecutorFactory.generate(store, logicalTargetMap, true, ignoreDoubleQuotes, strictMode.equals(StrictMode.STRICT)); + } else { + try { + functionExecutor = parseFunctionTermMap(functionValues.get(0), triplesMapBaseIRI); + } catch (IOException e) { + logger.error("Parsing function term map failed:" + e); + } + } + if (functionValues != null) { + targetGenerators.add(new NamedNodeGenerator(functionExecutor, triplesMapBaseIRI, strictMode)); + } + } + return targetGenerators; + } + + private List getTargets(Term termMap){ + List targets = new ArrayList<>(); + List logicalTargets = getObjectsFromQuads(store.getQuads(termMap, new NamedNode(NAMESPACES.RML2 + "logicalTarget"), null)); + targets.addAll(logicalTargets); + return targets; + } + } diff --git a/src/main/java/be/ugent/rml/MappingInfo.java b/src/main/java/be/ugent/rml/MappingInfo.java index 6b948f23..506e6c34 100644 --- a/src/main/java/be/ugent/rml/MappingInfo.java +++ b/src/main/java/be/ugent/rml/MappingInfo.java @@ -3,30 +3,28 @@ import be.ugent.rml.term.Term; import be.ugent.rml.termgenerator.TermGenerator; -import java.util.ArrayList; +import java.util.Collections; import java.util.List; public class MappingInfo { - private Term term; - private TermGenerator termGenerator; - private List targets; + private final Term term; + private final TermGenerator termGenerator; + private final List targets; + private final List targetGenerators; - public MappingInfo(Term term, TermGenerator termGenerator, List targets) { + public MappingInfo(Term term, TermGenerator termGenerator, List targets, List targetGenerators) { this.term = term; this.termGenerator = termGenerator; this.targets = targets; + this.targetGenerators = targetGenerators; } public MappingInfo(Term term, TermGenerator termGenerator) { - this.term = term; - this.termGenerator = termGenerator; - this.targets = new ArrayList(); + this(term, termGenerator, Collections.emptyList(), Collections.emptyList()); } - public MappingInfo(Term term, List targets) { - this.term = term; - this.termGenerator = null; - this.targets = targets; + public MappingInfo(Term term, List targets, List targetGenerators) { + this(term, null, targets, targetGenerators); } public Term getTerm() { @@ -41,8 +39,15 @@ public List getTargets() { return targets; } - public List addTargets(List targets) { + public List getTargetGenerators() { + return this.targetGenerators; + } + + public void addTargets(List targets) { this.targets.addAll(targets); - return this.targets; + } + + public void addTargetGenerators(List targetGenerators) { + getTargetGenerators().addAll(targetGenerators); } } diff --git a/src/main/java/be/ugent/rml/MappingOptimizer.java b/src/main/java/be/ugent/rml/MappingOptimizer.java new file mode 100644 index 00000000..70be3760 --- /dev/null +++ b/src/main/java/be/ugent/rml/MappingOptimizer.java @@ -0,0 +1,194 @@ +package be.ugent.rml; + +import be.ugent.rml.extractor.Extractor; +import be.ugent.rml.extractor.ReferenceExtractor; +import be.ugent.rml.store.Quad; +import be.ugent.rml.store.QuadStore; +import be.ugent.rml.term.NamedNode; +import be.ugent.rml.term.Term; + +import java.util.*; +import static be.ugent.rml.Utils.getObjectsFromQuads; + +public class MappingOptimizer { + + private final QuadStore rmlStore; + + public MappingOptimizer(QuadStore rmlStore) { + this.rmlStore = rmlStore; + } + + public QuadStore optimizeMapping() throws Exception { + renameSameLogicalSource(); + eliminateSelfJoins(); + return rmlStore; + } + + private void renameSameLogicalSource() { + List logicalSources = Utils.getObjectsFromQuads(rmlStore.getQuads(null,new NamedNode(NAMESPACES.RML2 + "logicalSource"),null)); + Map, Term> logicalSourcesDict = new HashMap<>(); + for (Term logicalSource : logicalSources){ + // two logical Sources are considered to be identical when they have the same objects at the leaves of their subgraph + Set allObjects = new HashSet<>(); + List objects = Utils.getObjectsFromQuads(rmlStore.getQuads(logicalSource, null, null)); + while (!objects.isEmpty()) { + Term object = objects.remove(objects.size()-1); + if (object.isBNode() || object.isIRI()) { + List newObjects = Utils.getObjectsFromQuads(rmlStore.getQuads(object, null, null)); + if (!newObjects.isEmpty()) { + objects.addAll(newObjects); + } else { + //object is final, not subject of new quads + allObjects.add(object); + } + } else { + //object is final, not subject of new quads + allObjects.add(object); + } + } + Set finalObjectSet = Collections.unmodifiableSet(allObjects); + if (!logicalSourcesDict.keySet().contains(finalObjectSet)) { + logicalSourcesDict.put(finalObjectSet, logicalSource); + } else { + List triplesMaps = Utils.getSubjectsFromQuads(this.rmlStore.getQuads(null, new NamedNode(NAMESPACES.RML2 + "logicalSource"), logicalSource)); + for (Term triplesMap : triplesMaps) { + rmlStore.removeQuads(triplesMap, new NamedNode(NAMESPACES.RML2 + "logicalSource"), logicalSource); + rmlStore.addQuad(triplesMap, new NamedNode(NAMESPACES.RML2 + "logicalSource"), logicalSourcesDict.get(finalObjectSet)); + } + } + } + } + + private void eliminateSelfJoins() { + List refObjectMapsQuads = rmlStore.getQuads(null, new NamedNode(NAMESPACES.RML2 + "parentTriplesMap"), null); + for (Quad refObjectMapQuad : refObjectMapsQuads) { + Term parentTriplesMap = refObjectMapQuad.getObject(); + Term childObjectMap = refObjectMapQuad.getSubject(); + Term parentLogicalSource = Utils.getObjectsFromQuads(rmlStore.getQuads(parentTriplesMap, new NamedNode(NAMESPACES.RML2 + "logicalSource"), null)).get(0); + Term childPredicateObjectMap = Utils.getSubjectsFromQuads(rmlStore.getQuads(null, new NamedNode(NAMESPACES.RML2 + "objectMap"), childObjectMap)).get(0); + Term childTriplesMap = Utils.getSubjectsFromQuads(rmlStore.getQuads(null, new NamedNode(NAMESPACES.RML2 + "predicateObjectMap"), childPredicateObjectMap)).get(0); + Term childLogicalSource = Utils.getObjectsFromQuads(rmlStore.getQuads(childTriplesMap, new NamedNode(NAMESPACES.RML2 + "logicalSource"), null)).get(0); + + // check if the logical sources are the same + if (childLogicalSource.equals(parentLogicalSource)) { + + List joinConditions = Utils.getObjectsFromQuads(rmlStore.getQuads(childObjectMap, new NamedNode(NAMESPACES.RML2 + "joinCondition"), null)); + + List parentSubjectMaps = Utils.getObjectsFromQuads(rmlStore.getQuads(parentTriplesMap, new NamedNode(NAMESPACES.RML2 + "subjectMap"), null)); + Term parentSubjectMap = null; + if (!parentSubjectMaps.isEmpty()) { + parentSubjectMap = parentSubjectMaps.get(0); + } + + boolean safeSelfJoinElimination = true; + + // if no join condition, we can safely eliminate the self-join + // else we need more checks + if (parentSubjectMap != null && !joinConditions.isEmpty()) { + // we can eliminate a self-join when all join conditions have equal references and all references for the parent subject or all reference for the related child triple come back in the join conditions + // 1. check if all join references are equal + List joinReferences = new ArrayList<>(); + for (Term joinCondition : joinConditions) { + String parent = getObjectsFromQuads(rmlStore.getQuads(joinCondition, new NamedNode(NAMESPACES.RML2 + "parent"), null)).get(0).getValue(); + String child = getObjectsFromQuads(rmlStore.getQuads(joinCondition, new NamedNode(NAMESPACES.RML2 + "child"), null)).get(0).getValue(); + if (child.equals(parent)) { + joinReferences.add(child); + } else { + safeSelfJoinElimination = false; + } + } + if (safeSelfJoinElimination) { + // 2. check if all references for the parent subject come back in the join conditions + boolean safeTerms = hasSafeReferences(parentSubjectMap, joinReferences); + if(!safeTerms) { + // if not all references for the parent subject come back in the join conditions, + // 3. check if all references for the related child terms come back in the join conditions + // 3.1 check child subject + List childSubjectMaps = Utils.getObjectsFromQuads(rmlStore.getQuads(parentTriplesMap, new NamedNode(NAMESPACES.RML2 + "subjectMap"), null)); + if(!childSubjectMaps.isEmpty()) { + safeTerms = hasSafeReferences(childSubjectMaps.get(0), joinReferences); + } else { + safeTerms = true; + } + //3.2 check child predicate (only make sense if the child subject was safe, otherwise we cannot eliminate the-self join) + if (safeTerms) { + List childPredicateMaps = Utils.getObjectsFromQuads(rmlStore.getQuads(childPredicateObjectMap, new NamedNode(NAMESPACES.RML2 + "predicateMap"), null)); + if(!childPredicateMaps.isEmpty()) { + safeTerms = hasSafeReferences(childPredicateMaps.get(0), joinReferences); + } + } + } + // 4. if parent subject or all child terms are safe, the self join can be eliminated, else not + if (!safeTerms) { + safeSelfJoinElimination = false; + } + } + } + if (safeSelfJoinElimination) { + // now we rewrite the mapping file to eliminate the self-join + boolean termTypeAdded = false; + List parentSubjectMapQuads = rmlStore.getQuads(parentSubjectMap, null, null); + for (Quad parentSubjectMapQuad : parentSubjectMapQuads) { + Term predicate = parentSubjectMapQuad.getPredicate(); + if (predicate.equals(new NamedNode(NAMESPACES.FNML + "functionValue")) + || predicate.equals(new NamedNode(NAMESPACES.RML2 + "termType")) + || predicate.equals(new NamedNode(NAMESPACES.RML2 + "reference")) + || predicate.equals(new NamedNode(NAMESPACES.RML2 + "template")) + || predicate.equals(new NamedNode(NAMESPACES.RML2 + "constant"))) { + rmlStore.addQuad(childObjectMap, predicate, parentSubjectMapQuad.getObject()); + } + if (predicate.equals(new NamedNode(NAMESPACES.RML2 + "termType"))) { + termTypeAdded = true; + } + } + rmlStore.removeQuads(childObjectMap, new NamedNode(NAMESPACES.RML2 + "parentTriplesMap"), parentTriplesMap); + if (!termTypeAdded) { + rmlStore.addQuad(childObjectMap, new NamedNode(NAMESPACES.RML2 + "termType"), new NamedNode(NAMESPACES.RML2 + "IRI")); + } + } + } + } + } + + private Set getAllLinkedReferences(Term term){ + Set references = new HashSet<>(); + List linkedSubjects = new ArrayList<>(); + linkedSubjects.add(term); + while(!linkedSubjects.isEmpty()) { + Term subject = linkedSubjects.get(0); + List linkedQuads = rmlStore.getQuads(subject, null, null); + for (Quad linkedQuad : linkedQuads) { + Term predicate = linkedQuad.getPredicate(); + if (predicate.equals(new NamedNode(NAMESPACES.RML2 + "reference"))) { + references.add(linkedQuad.getObject().getValue()); + } else if (predicate.equals(new NamedNode(NAMESPACES.RML2 + "template"))) { + String template = linkedQuad.getObject().getValue(); + List extractors = Utils.parseTemplate(template, false, false); + for (Extractor extractor : extractors) { + if (extractor instanceof ReferenceExtractor) { + references.add(((ReferenceExtractor) extractor).getReference()); + } + } + } else { + Term object = linkedQuad.getObject(); + if (object.isBNode() || object.isIRI()) { + linkedSubjects.add(object); + } + } + } + linkedSubjects.remove(0); + } + return references; + } + + private boolean hasSafeReferences(Term term, List joinReferences){ + boolean isSafe = true; + Set termReferences = getAllLinkedReferences(term); + for (String parentReference : termReferences){ + if (!joinReferences.contains(parentReference)){ + isSafe = false; + } + } + return isSafe; + } +} diff --git a/src/main/java/be/ugent/rml/MyFileUtils.java b/src/main/java/be/ugent/rml/MyFileUtils.java index 2ac42926..f3655840 100644 --- a/src/main/java/be/ugent/rml/MyFileUtils.java +++ b/src/main/java/be/ugent/rml/MyFileUtils.java @@ -1,17 +1,17 @@ package be.ugent.rml; -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.FilenameUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; -import java.io.InputStream; -import java.net.URI; import java.net.URISyntaxException; import java.net.URL; -import java.net.URLClassLoader; +import java.net.URLDecoder; +import java.nio.charset.StandardCharsets; class MyFileUtils { + private static Logger logger = LoggerFactory.getLogger(MyFileUtils.class); /** * @param resource @@ -19,105 +19,29 @@ class MyFileUtils { * @throws IOException */ static File getResourceAsFile(String resource) throws IOException { + logger.debug("Searching for '{}' in resources.", resource); ClassLoader cl = MyFileUtils.class.getClassLoader(); - File file; - FileResource fileResource = new URLClassLoaderFileResource(cl, resource); - try { - file = fileResource.getFile(); - } catch (IOException e) { - try { - fileResource = new ClasspathResourceFileResource(cl, resource); - file = fileResource.getFile(); - } catch (Exception e2) { - throw new IOException(e2); - } + URL resourceUrl = cl.getResource(resource); + logger.debug("default class loader found '{}'", resourceUrl); + if (resourceUrl == null) { + throw new IOException("Resource file " + resource + " doesn't exist"); } - return file; - } - - public interface FileResource { - File getFile() throws IOException; - } - - public static class ClasspathResourceFileResource implements FileResource { - - private ClassLoader cl; - private String resource; - private String extension; - - /** - * @param cl - * @param resource - */ - ClasspathResourceFileResource(ClassLoader cl, String resource) { - this.cl = cl; - this.resource = resource; - this.extension = FilenameUtils.getExtension(resource); - } - - /** - * @return - * @throws IOException - */ - public File getFile() throws IOException { - String suffix = "temp"; + if ("file".equals(resourceUrl.getProtocol())) { + try { - if (this.extension != null) { - suffix += "." + this.extension; + String path = resourceUrl.toURI().getRawPath(); + logger.debug("returning file '{}'", path); + path = URLDecoder.decode(path, StandardCharsets.UTF_8); + return new File(path); + } catch (URISyntaxException e) { + throw new IOException("Unable to get file through class loader: " + cl, e); } - InputStream cpResource = cl.getResourceAsStream(resource); - File tmpFile = File.createTempFile("file", suffix); - FileUtils.copyInputStreamToFile(cpResource, tmpFile); - tmpFile.deleteOnExit(); - return tmpFile; - } - } - - public static class URLClassLoaderFileResource implements FileResource { + } else { + throw new IOException( + "Unable to get file through class loader: " + cl); - private ClassLoader cl; - private String resource; - - /** - * @param cl - * @param resourcePath - */ - URLClassLoaderFileResource(ClassLoader cl, String resourcePath) { - this.cl = cl; - this.resource = resourcePath; } - - /** - * @return - * @throws IOException - */ - public File getFile() throws IOException { - File resourceFile = null; - if (cl instanceof URLClassLoader) { - URLClassLoader urlClassLoader = URLClassLoader.class.cast(cl); - URL resourceUrl = urlClassLoader.findResource(resource); - if (resourceUrl == null) { - throw new IOException("Resource file " + resource + " doesn't exist"); - } - if ("file".equals(resourceUrl.getProtocol())) { - try { - - URI uri = resourceUrl.toURI(); - resourceFile = new File(uri); - } catch (URISyntaxException e) { - throw new IOException("Unable to get file through class loader: " + cl, e); - } - - } - } - if (resourceFile == null) { - throw new IOException( - "Unable to get file through class loader: " + cl); - } - return resourceFile; - } - } /** @@ -132,6 +56,7 @@ public static String getParentPath(Class c, String path) { if (url != null) { path = url.getFile(); + path = URLDecoder.decode(path, StandardCharsets.UTF_8); } File outputFile = new File(path); diff --git a/src/main/java/be/ugent/rml/NAMESPACES.java b/src/main/java/be/ugent/rml/NAMESPACES.java index dcf4e63f..bb8900f9 100644 --- a/src/main/java/be/ugent/rml/NAMESPACES.java +++ b/src/main/java/be/ugent/rml/NAMESPACES.java @@ -3,6 +3,8 @@ public class NAMESPACES { public static final String DCAT = "http://www.w3.org/ns/dcat#"; public static final String RML = "http://semweb.mmlab.be/ns/rml#"; + + public static final String RML2 = "http://w3id.org/rml/"; public static final String RR = "http://www.w3.org/ns/r2rml#"; public static final String RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; public static final String FNML = "http://semweb.mmlab.be/ns/fnml#"; @@ -30,4 +32,8 @@ public class NAMESPACES { public static final String RMLT = "http://semweb.mmlab.be/ns/rml-target#"; public static final String FORMATS = "http://www.w3.org/ns/formats/"; public static final String COMP = "http://semweb.mmlab.be/ns/rml-compression#"; + public static final String LDES = "https://w3id.org/ldes#"; + public static final String TREE = "https://w3id.org/tree#"; + public static final String RMLE = "https://w3id.org/imec/rml/ns/extensions#"; + } diff --git a/src/main/java/be/ugent/rml/POGFunction.java b/src/main/java/be/ugent/rml/POGFunction.java new file mode 100644 index 00000000..cfc463a6 --- /dev/null +++ b/src/main/java/be/ugent/rml/POGFunction.java @@ -0,0 +1,8 @@ +package be.ugent.rml; + +import be.ugent.rml.term.ProvenancedTerm; + +@FunctionalInterface +public interface POGFunction { + void generateQuad(ProvenancedTerm subject, ProvenancedTerm predicate, ProvenancedTerm object, ProvenancedTerm graph, boolean checkMagicValue); +} diff --git a/src/main/java/be/ugent/rml/PredicateObjectGraphMapping.java b/src/main/java/be/ugent/rml/PredicateObjectGraphMapping.java index 79929a01..c49fd0be 100644 --- a/src/main/java/be/ugent/rml/PredicateObjectGraphMapping.java +++ b/src/main/java/be/ugent/rml/PredicateObjectGraphMapping.java @@ -25,6 +25,7 @@ public PredicateObjectGraphMapping(MappingInfo predicateMappingInfo, MappingInfo // Language Maps are tightly connected to the object, merge the target lists if(this.languageMappingInfo != null) { + this.objectMappingInfo.addTargetGenerators(this.languageMappingInfo.getTargetGenerators()); this.objectMappingInfo.addTargets(this.languageMappingInfo.getTargets()); } } diff --git a/src/main/java/be/ugent/rml/RecordFunctionExecutorFactory.java b/src/main/java/be/ugent/rml/RecordFunctionExecutorFactory.java index d2bf7865..7420b6b3 100644 --- a/src/main/java/be/ugent/rml/RecordFunctionExecutorFactory.java +++ b/src/main/java/be/ugent/rml/RecordFunctionExecutorFactory.java @@ -12,18 +12,23 @@ public class RecordFunctionExecutorFactory { - public static SingleRecordFunctionExecutor generate(QuadStore store, Term termMap, boolean encodeURI, boolean ignoreDoubleQuotes) { - List references = Utils.getObjectsFromQuads(store.getQuads(termMap, new NamedNode(NAMESPACES.RML + "reference"), null)); - List templates = Utils.getObjectsFromQuads(store.getQuads(termMap, new NamedNode(NAMESPACES.RR + "template"), null)); - List constants = Utils.getObjectsFromQuads(store.getQuads(termMap, new NamedNode(NAMESPACES.RR + "constant"), null)); + public static SingleRecordFunctionExecutor generate(QuadStore store, Term termMap, boolean encodeURI, boolean ignoreDoubleQuotes, boolean strictReferenceResolution) { + List references = Utils.getObjectsFromQuads(store.getQuads(termMap, new NamedNode(NAMESPACES.RML2 + "reference"), null)); + List templates = Utils.getObjectsFromQuads(store.getQuads(termMap, new NamedNode(NAMESPACES.RML2 + "template"), null)); + List constants = Utils.getObjectsFromQuads(store.getQuads(termMap, new NamedNode(NAMESPACES.RML2 + "constant"), null)); - if (!references.isEmpty()) { - return new ReferenceExtractor(references.get(0).getValue(), ignoreDoubleQuotes); - } else if (!templates.isEmpty()) { - return new ConcatFunction(Utils.parseTemplate(templates.get(0).getValue(), ignoreDoubleQuotes), encodeURI); - } else if (!constants.isEmpty()) { - return new ConstantExtractor(constants.get(0).getValue()); - } else { + try { + if (!references.isEmpty()) { + return new ReferenceExtractor(references.get(0).getValue(), ignoreDoubleQuotes, strictReferenceResolution); + } else if (!templates.isEmpty()) { + return new ConcatFunction(Utils.parseTemplate(templates.get(0).getValue(), ignoreDoubleQuotes, strictReferenceResolution), encodeURI); + } else if (!constants.isEmpty()) { + return new ConstantExtractor(constants.get(0).getValue()); + } else { + return null; + } + } catch (Exception e) { + System.err.println("Extracting constant or references failed: " + e.getMessage()); return null; } } diff --git a/src/main/java/be/ugent/rml/Utils.java b/src/main/java/be/ugent/rml/Utils.java index 1c8aaf80..f1723e73 100644 --- a/src/main/java/be/ugent/rml/Utils.java +++ b/src/main/java/be/ugent/rml/Utils.java @@ -8,8 +8,6 @@ import be.ugent.rml.term.Literal; import be.ugent.rml.term.NamedNode; import be.ugent.rml.term.Term; -import com.google.common.escape.Escaper; -import com.google.common.net.UrlEscapers; import org.apache.commons.io.FilenameUtils; import org.apache.commons.io.IOUtils; import org.eclipse.rdf4j.common.net.ParsedIRI; @@ -67,15 +65,11 @@ public static Reader getReaderFromLocation(String location, File basePath, Strin } } - public static InputStream getInputStreamFromLocation(String location) throws IOException { - return getInputStreamFromLocation(location, null, ""); - } - public static InputStream getInputStreamFromLocation(String location, File basePath, String contentType) throws IOException { - return getInputStreamFromLocation(location, basePath, contentType, new HashMap()); + return getInputStreamFromLocation(location, basePath, contentType, new HashMap<>()); } - public static InputStream getInputStreamFromLocation(String location, File basePath, String contentType, HashMap headers) throws IOException { + public static InputStream getInputStreamFromLocation(String location, File basePath, String contentType, Map headers) throws IOException { if (isRemoteFile(location)) { return getInputStreamFromURL(new URL(location), contentType, headers); } else { @@ -85,13 +79,26 @@ public static InputStream getInputStreamFromLocation(String location, File baseP /** * Get an InputStream from a string. This string is either a path (local or remote) to an RDF file, or a raw RDF text. + * If it's a path, conversion from Windows path separators to UNIX paht separators is performed * @param mOptionValue input, either RDF file path or raw RDF text * @return input stream */ public static InputStream getInputStreamFromFileOrContentString(String mOptionValue) { InputStream out; logger.debug("{} mapping file", mOptionValue); - String extension = FilenameUtils.getExtension(mOptionValue); + String extension; + try{ + // will throw illegalArgumentException on a windows NTFS if a ":" is present + // on Windows a : is the identifier of an alternate data stream + extension = FilenameUtils.getExtension(mOptionValue); + } + catch (IllegalArgumentException e){ + return IOUtils.toInputStream(mOptionValue, StandardCharsets.UTF_8); + } + if (extension != null) { + // Windows paths 🤷‍♂️ + mOptionValue = mOptionValue.replaceAll("\\\\", "/"); + } try { switch (extension) { case "n3": @@ -121,29 +128,28 @@ public static InputStream getInputStreamFromFileOrContentString(String mOptionVa } } catch (IOException e) { logger.info("Trying to read mapping as raw input string."); - try { - // raw mapping input string - out = IOUtils.toInputStream(mOptionValue, "UTF-8"); - } catch (IOException e2) { - logger.error("Cannot read mapping option {}", mOptionValue); - out = new ByteArrayInputStream(new byte[0]); - } + out = IOUtils.toInputStream(mOptionValue, StandardCharsets.UTF_8); } return out; } private static InputStream getTurtleInputStreamForFormat(String mOptionValue, RDFFormat format) throws IOException { - InputStream out = getInputStreamFromLocation(mOptionValue, null, format.getDefaultMIMEType()); - Model model = Rio.parse(out, "", format); - ByteArrayOutputStream output = new ByteArrayOutputStream(); - Rio.write(model, output, RDFFormat.TURTLE); - return new ByteArrayInputStream(output.toByteArray()); + try (InputStream out = getInputStreamFromLocation(mOptionValue, null, format.getDefaultMIMEType())) { + Model model = Rio.parse(out, "", format); + ByteArrayOutputStream output = new ByteArrayOutputStream(); + Rio.write(model, output, RDFFormat.TURTLE); + return new ByteArrayInputStream(output.toByteArray()); + } } public static File getFile(String path) throws IOException { return Utils.getFile(path, null); } + /** + * Get path based on basePath or (if not filled in) the user.dir + * This file assumes UNIX path separators. + */ public static File getFile(String path, File basePath) throws IOException { // Absolute path? File f = new File(path); @@ -163,7 +169,7 @@ public static File getFile(String path, File basePath) throws IOException { } } - logger.debug("Looking for file " + path + " in basePath " + basePath); + logger.debug("Looking for file {} in basePath {}", path, basePath); // Relative from user dir? f = new File(basePath, path); @@ -171,8 +177,8 @@ public static File getFile(String path, File basePath) throws IOException { return f; } - logger.debug("File " + path + " not found in " + basePath); - logger.debug("Looking for file " + path + " in " + basePath + "/../"); + logger.debug("File {} not found in {}", path, basePath); + logger.debug("Looking for file {} in {} /../", path, basePath); // Relative from parent of user dir? @@ -181,8 +187,9 @@ public static File getFile(String path, File basePath) throws IOException { return f; } - logger.debug("File " + path + " not found in " + basePath); - logger.debug("Looking for file " + path + " in the resources directory"); + logger.debug("File {} not found in {}", path, basePath); + + logger.debug("Looking for file {} in the resources directory", path); // Resource path? try { @@ -191,7 +198,7 @@ public static File getFile(String path, File basePath) throws IOException { // Too bad } - logger.debug("File " + path + " not found in the resources directory"); + logger.debug("File {} not found in the resources directory", path); throw new FileNotFoundException(path); } @@ -229,7 +236,7 @@ public static InputStream getInputStreamFromURL(URL url, String contentType) { return inputStream; } - public static InputStream getInputStreamFromURL(URL url, String contentType, HashMap headers) { + public static InputStream getInputStreamFromURL(URL url, String contentType, Map headers) { InputStream inputStream = null; try { HttpURLConnection connection = (HttpURLConnection) url.openConnection(); @@ -243,11 +250,43 @@ public static InputStream getInputStreamFromURL(URL url, String contentType, Has } // Apply all headers headers.forEach((name, value) -> { - logger.debug(name + ": " + value); + logger.debug("{}: {}", name, value); connection.setRequestProperty(name, value); }); + logger.debug("trying to connect"); connection.connect(); + logger.debug("getting inputstream"); inputStream = connection.getInputStream(); + logger.debug("got inputstream"); + } catch (IOException ex) { + ex.printStackTrace(); + } + return inputStream; + } + + public static InputStream getInputStreamFromAuthURL(URL url, String contentType, Map headers) throws Exception { + InputStream inputStream = null; + try { + HttpURLConnection connection = (HttpURLConnection) url.openConnection(); + connection.setDoOutput(true); + connection.setInstanceFollowRedirects(true); + connection.setRequestMethod("GET"); + connection.setRequestProperty("Accept", contentType); + // Set encoding if not set before + if(!headers.containsKey("charset")) { + headers.put("charset", "utf-8"); + } + // Apply all headers + headers.forEach((name, value) -> { + logger.debug("{}: {}", name, value); + connection.setRequestProperty(name, value); + }); + logger.debug("trying to connect"); + connection.connect(); + if(connection.getResponseCode() == 401) throw new Exception("not authenticated"); + logger.debug("getting inputstream"); + inputStream = connection.getInputStream(); + logger.debug("got inputstream"); } catch (IOException ex) { ex.printStackTrace(); } @@ -258,12 +297,33 @@ public static InputStream getInputStreamFromFile(File file) throws FileNotFoundE return new FileInputStream(file); } + public static InputStream getPostRequestResponse(URL url, String contentType, byte[] auth ){ + InputStream inputStream = null; + Map headers = new HashMap<>(); + headers.put("charset", "utf-8"); + + try { + HttpURLConnection connection = (HttpURLConnection) url.openConnection(); + connection.setDoOutput(true); + connection.setInstanceFollowRedirects(true); + connection.setRequestMethod("POST"); + connection.setRequestProperty("Accept", contentType); + connection.connect(); + OutputStream outputStream = connection.getOutputStream(); + outputStream.write(auth); + inputStream = connection.getInputStream(); + } catch (IOException ex) { + ex.printStackTrace(); + } + return inputStream; + } + public static boolean isRemoteFile(String location) { return location.startsWith("https://") || location.startsWith("http://"); } public static List getSubjectsFromQuads(List quads) { - ArrayList subjects = new ArrayList<>(); + List subjects = new ArrayList<>(); for (Quad quad : quads) { subjects.add(quad.getSubject()); @@ -273,7 +333,7 @@ public static List getSubjectsFromQuads(List quads) { } public static List getObjectsFromQuads(List quads) { - ArrayList objects = new ArrayList<>(); + List objects = new ArrayList<>(); for (Quad quad : quads) { objects.add(quad.getObject()); @@ -283,7 +343,7 @@ public static List getObjectsFromQuads(List quads) { } public static List getLiteralObjectsFromQuads(List quads) { - ArrayList objects = new ArrayList<>(); + List objects = new ArrayList<>(); for (Quad quad : quads) { objects.add(((Literal) quad.getObject()).getValue()); @@ -326,29 +386,20 @@ public static boolean isValidrrLanguage(String s) { } public static String encodeURI(String url) { - Escaper escaper = UrlEscapers.urlFragmentEscaper(); - String result = escaper.escape(url); - - result = result.replaceAll("!", "%21"); - result = result.replaceAll("#", "%23"); - result = result.replaceAll("\\$", "%24"); - result = result.replaceAll("&", "%26"); - result = result.replaceAll("'", "%27"); - result = result.replaceAll("\\(", "%28"); - result = result.replaceAll("\\)", "%29"); - result = result.replaceAll("\\*", "%2A"); - result = result.replaceAll("\\+", "%2B"); - result = result.replaceAll(",", "%2C"); - result = result.replaceAll("/", "%2F"); - result = result.replaceAll(":", "%3A"); - result = result.replaceAll(";", "%3B"); - result = result.replaceAll("=", "%3D"); - result = result.replaceAll("\\?", "%3F"); - result = result.replaceAll("@", "%40"); - result = result.replaceAll("\\[", "%5B"); - result = result.replaceAll("]", "%5D"); - - return result; + /* Avoid using regex to escape + and * chars for performance */ + final StringBuilder builder = new StringBuilder(); + final String encoded = URLEncoder.encode(url, StandardCharsets.UTF_8); + + for (char c: encoded.toCharArray()) { + if (c == '+') + builder.append("%20"); + else if (c == '*') + builder.append("%2A"); + else + builder.append(c); + } + + return builder.toString(); } public static String fileToString(File file) throws IOException { @@ -429,8 +480,8 @@ public static int getFreePortNumber() throws IOException { * @param template template string * @return list of extractors **/ - public static List parseTemplate(String template, boolean ignoreDoubleQuotes) { - ArrayList extractors = new ArrayList<>(); + public static List parseTemplate(String template, boolean ignoreDoubleQuotes, boolean strictReferenceResolution) { + List extractors = new ArrayList<>(); String current = ""; boolean previousWasBackslash = false; boolean variableBusy = false; @@ -458,7 +509,7 @@ public static List parseTemplate(String template, boolean ignoreDoubl current += c; previousWasBackslash = false; } else if (variableBusy) { - extractors.add(new ReferenceExtractor(current, ignoreDoubleQuotes)); + extractors.add(new ReferenceExtractor(current, ignoreDoubleQuotes, strictReferenceResolution)); current = ""; variableBusy = false; } else { @@ -551,26 +602,48 @@ public static boolean isRelativeIRI(String iri) { } } - public static String getBaseDirectiveTurtle(File file) { - StringBuilder contentBuilder = new StringBuilder(); - try (Stream stream = Files.lines(Paths.get(file.getAbsolutePath()), StandardCharsets.UTF_8)) { - stream.forEach(s -> contentBuilder.append(s).append("\n")); - } catch (IOException e) { - e.printStackTrace(); + public static boolean checkPathParent(String path, String base) { + File f; + File basePath; + if (base == null) { + f = new File(path); + if (f.isAbsolute()) { + return f.getParentFile().exists(); + } + base = System.getProperty("user.dir"); + } + try { + basePath = new File(base); + } catch (Exception e) { + return false; } - String turtle = contentBuilder.toString(); - return Utils.getBaseDirectiveTurtle(turtle); + logger.info("Looking for parent of file {} in basePath {}", path, basePath); + + // Relative from user dir? + f = new File(basePath, path); + return f.getParentFile().exists(); } - public static String getBaseDirectiveTurtle(InputStream is) { - String turtle = null; + /** + * Get the base directive from a turtle file or return the default base + * @param is - input stream of the turtle file + * @param defaultBase - default base to return if no base directive is found + * @return - base directive or default base + */ + public static String getBaseDirectiveTurtleOrDefault(InputStream is, String defaultBase) { + String turtle; try { turtle = IOUtils.toString(is, StandardCharsets.UTF_8); } catch (IOException e) { turtle = ""; } - return Utils.getBaseDirectiveTurtle(turtle); + + String base = getBaseDirectiveTurtle(turtle); + if (base == null) { + base = defaultBase; + } + return base; } public static String getBaseDirectiveTurtle(String turtle) { diff --git a/src/main/java/be/ugent/rml/ValuedJoinCondition.java b/src/main/java/be/ugent/rml/ValuedJoinCondition.java deleted file mode 100644 index 1b0da49b..00000000 --- a/src/main/java/be/ugent/rml/ValuedJoinCondition.java +++ /dev/null @@ -1,22 +0,0 @@ -package be.ugent.rml; - -import java.util.List; - -public class ValuedJoinCondition { - - private Template path; - private List values; - - public ValuedJoinCondition(Template path, List values) { - this.path = path; - this.values = values; - } - - public Template getPath() { - return path; - } - - public List getValues() { - return values; - } -} diff --git a/src/main/java/be/ugent/rml/access/Access.java b/src/main/java/be/ugent/rml/access/Access.java deleted file mode 100644 index db9a1bf5..00000000 --- a/src/main/java/be/ugent/rml/access/Access.java +++ /dev/null @@ -1,29 +0,0 @@ -package be.ugent.rml.access; - -import java.io.IOException; -import java.io.InputStream; -import java.sql.SQLException; -import java.util.Map; - -/** - * This interface represents the access to a data source. - * For example, a local file, a remote file, a relational database, and so on. - */ -public interface Access { - - /** - * This method returns an InputStream for the access. - * @return the InputStream corresponding to the access. - * @throws IOException - */ - InputStream getInputStream() throws IOException, SQLException, ClassNotFoundException; - - /** - * This method returns a map of datatypes. - * References to values are mapped to their datatypes, if available. - * @return map of datatypes. - */ - Map getDataTypes(); - - String getContentType(); -} diff --git a/src/main/java/be/ugent/rml/access/AccessFactory.java b/src/main/java/be/ugent/rml/access/AccessFactory.java index d8ac5242..0af68240 100644 --- a/src/main/java/be/ugent/rml/access/AccessFactory.java +++ b/src/main/java/be/ugent/rml/access/AccessFactory.java @@ -1,18 +1,24 @@ package be.ugent.rml.access; +import be.ugent.idlab.knows.dataio.access.*; import be.ugent.rml.NAMESPACES; import be.ugent.rml.Utils; +import be.ugent.rml.records.ReferenceFormulation; import be.ugent.rml.records.SPARQLResultFormat; +import be.ugent.rml.store.Quad; import be.ugent.rml.store.QuadStore; import be.ugent.rml.term.Literal; import be.ugent.rml.term.NamedNode; import be.ugent.rml.term.Term; -import org.apache.commons.lang.NotImplementedException; +import org.apache.commons.lang3.NotImplementedException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.File; +import java.util.Arrays; import java.util.HashMap; import java.util.List; +import java.util.Map; import static be.ugent.rml.Utils.isRemoteFile; @@ -22,15 +28,23 @@ public class AccessFactory { // The path used when local paths are not absolute. - private String basePath; - private static final Logger logger = LoggerFactory.getLogger(AccessFactory.class); + private final String basePath; + private final String mappingPath; + final Logger logger = LoggerFactory.getLogger(Logger.ROOT_LOGGER_NAME); + + private static final Map REF_FORM_MIMETYPE = Map.of( + NAMESPACES.RML2 + "CSV", "text/csv" + ); /** * The constructor of the AccessFactory. + * * @param basePath the base path for the local file system. + * @param mappingPath the path to the used mapping file. */ - public AccessFactory(String basePath) { + public AccessFactory(String basePath, String mappingPath) { this.basePath = basePath; + this.mappingPath = mappingPath; } /** @@ -40,8 +54,8 @@ public AccessFactory(String basePath) { * @return an Access instance based on the RML rules in rmlStore. */ public Access getAccess(Term logicalSource, QuadStore rmlStore) { - List sources = Utils.getObjectsFromQuads(rmlStore.getQuads(logicalSource, new NamedNode(NAMESPACES.RML + "source"), null)); - Access access = null; + List sources = Utils.getObjectsFromQuads(rmlStore.getQuads(logicalSource, new NamedNode(NAMESPACES.RML2 + "source"), null)); + Access access; // check if at least one source is available. if (!sources.isEmpty()) { @@ -49,19 +63,37 @@ public Access getAccess(Term logicalSource, QuadStore rmlStore) { // if we are dealing with a literal, // then it's either a local or remote file. - if (sources.get(0) instanceof Literal) { - String value = sources.get(0).getValue(); - + if (sources.get(0) instanceof Literal literal) { + String value = literal.getValue(); if (isRemoteFile(value)) { - access = new RemoteFileAccess(value); + Term refForm = Utils.getObjectsFromQuads(rmlStore.getQuads(logicalSource, new NamedNode(NAMESPACES.RML2 + "referenceFormulation"), null)).get(0); + String mimeType = REF_FORM_MIMETYPE.get(refForm.toString()); + access = new RemoteFileAccess(value, mimeType); } else { access = new LocalFileAccess(value, this.basePath); } } else { // if not a literal, then we are dealing with a more complex description. List sourceType = Utils.getObjectsFromQuads(rmlStore.getQuads(source, new NamedNode(NAMESPACES.RDF + "type"), null)); + sourceType.remove(new NamedNode(NAMESPACES.RML2 + "Source")); switch(sourceType.get(0).getValue()) { + case NAMESPACES.RML2 + "RelativePathSource": + case NAMESPACES.RML2 + "FilePath": + String path = Utils.getObjectsFromQuads(rmlStore.getQuads(source, new NamedNode(NAMESPACES.RML2 + "path"), null)).get(0).getValue(); + List rootNodes = rmlStore.getQuads(source, new NamedNode(NAMESPACES.RML2 + "root"), null); + if (rootNodes.isEmpty()) { + access = new LocalFileAccess(path, null); + } else { + String root = Utils.getObjectsFromQuads(rootNodes).get(0).getValue(); + if (root.equals(NAMESPACES.RML2 + "MappingDirectory")) { + access = new LocalFileAccess(path, this.mappingPath); + } else { + access = new LocalFileAccess(path, this.basePath); + } + } + break; + case NAMESPACES.D2RQ + "Database": // RDBs access = getRDBAccess(rmlStore, source, logicalSource); @@ -76,18 +108,18 @@ public Access getAccess(Term logicalSource, QuadStore rmlStore) { } // Get query - List query = Utils.getObjectsFromQuads(rmlStore.getQuads(logicalSource, new NamedNode(NAMESPACES.RML + "query"), null)); + List query = Utils.getObjectsFromQuads(rmlStore.getQuads(logicalSource, new NamedNode(NAMESPACES.RML2 + "iterator"), null)); if (query.isEmpty()) { throw new Error("No SPARQL query found"); } - List referenceFormulations = Utils.getObjectsFromQuads(rmlStore.getQuads(logicalSource, new NamedNode(NAMESPACES.RML + "referenceFormulation"), null)); + List referenceFormulations = Utils.getObjectsFromQuads(rmlStore.getQuads(logicalSource, new NamedNode(NAMESPACES.RML2 + "referenceFormulation"), null)); // Get result format List resultFormatObject = Utils.getObjectsFromQuads(rmlStore.getQuads(source, new NamedNode(NAMESPACES.SD + "resultFormat"), null)); SPARQLResultFormat resultFormat = getSPARQLResultFormat(resultFormatObject, referenceFormulations); - access = new SPARQLEndpointAccess(resultFormat.getContentType(), endpoint.get(0).getValue(), query.get(0).getValue());; + access = new SPARQLEndpointAccess(resultFormat.getContentType(), endpoint.get(0).getValue(), query.get(0).getValue()); break; case NAMESPACES.CSVW + "Table": // CSVW @@ -100,56 +132,98 @@ public Access getAccess(Term logicalSource, QuadStore rmlStore) { String value = urls.get(0).getValue(); if (isRemoteFile(value)) { - access = new RemoteFileAccess(value); + access = new RemoteFileAccess(value, "text/csvw"); } else { - access = new LocalFileAccess(value, this.basePath); + access = new LocalFileAccess(value, this.basePath, "text/csvw"); } + break; + case NAMESPACES.TD + "Thing": + Map> auth2 = new HashMap<>(); + auth2.put("data", new HashMap<>()); + auth2.put("info", new HashMap<>()); + + try { + Term propertyAffordance = rmlStore.getQuad(source, new NamedNode(NAMESPACES.TD + "hasPropertyAffordance"), null).getObject(); + List form = Utils.getObjectsFromQuads(rmlStore.getQuads(propertyAffordance, new NamedNode(NAMESPACES.TD + "hasForm"), null)); + List targets = Utils.getObjectsFromQuads(rmlStore.getQuads(form.get(0), new NamedNode(NAMESPACES.HCTL + "hasTarget"), null)); + List contentTypes = Utils.getObjectsFromQuads(rmlStore.getQuads(form.get(0), new NamedNode(NAMESPACES.HCTL + "forContentType"), null)); + + // TODO: determine which protocol is used to know which vocabulary is needed for the protocol specific part. + String target = targets.get(0).getValue(); + String contentType = contentTypes.isEmpty() ? null : contentTypes.get(0).getValue(); + + access = new WoTAccess(target, contentType, new HashMap<>(), auth2); + } catch (Exception e) { + logger.error("Cannot create WoT TD:Thing access"); + access = null; + } break; case NAMESPACES.TD + "PropertyAffordance": - HashMap headers = new HashMap(); + Map headers = new HashMap<>(); + Map> auth = new HashMap<>(); + auth.put("data", new HashMap<>()); + auth.put("info", new HashMap<>()); List form = Utils.getObjectsFromQuads(rmlStore.getQuads(source, new NamedNode(NAMESPACES.TD + "hasForm"), null)); List targets = Utils.getObjectsFromQuads(rmlStore.getQuads(form.get(0), new NamedNode(NAMESPACES.HCTL + "hasTarget"), null)); List contentTypes = Utils.getObjectsFromQuads(rmlStore.getQuads(form.get(0), new NamedNode(NAMESPACES.HCTL + "forContentType"), null)); List headerList = Utils.getObjectsFromQuads(rmlStore.getQuads(form.get(0), new NamedNode(NAMESPACES.HTV + "headers"), null)); - // Security schema & data try { Term thing = Utils.getSubjectsFromQuads(rmlStore.getQuads(null, new NamedNode(NAMESPACES.TD + "hasPropertyAffordance"), source)).get(0); List securityConfiguration = Utils.getObjectsFromQuads(rmlStore.getQuads(thing, new NamedNode(NAMESPACES.TD + "hasSecurityConfiguration"), null)); - logger.debug("Security config: " + securityConfiguration.toString()); + logger.debug("Security config: {}", Arrays.toString(securityConfiguration.toArray())); + for (Term sc : securityConfiguration) { - // TODO: support more security configurations + boolean isOAuth = !Utils.getObjectsFromQuads(rmlStore.getQuads(sc, new NamedNode(NAMESPACES.RDF + "type"), + new NamedNode(NAMESPACES.WOTSEC + "OAuth2SecurityScheme"))).isEmpty(); + boolean isBearer = !Utils.getObjectsFromQuads(rmlStore.getQuads(sc, new NamedNode(NAMESPACES.RDF + "type"), + new NamedNode(NAMESPACES.WOTSEC + "BearerSecurityScheme"))).isEmpty(); List securityIn = Utils.getObjectsFromQuads(rmlStore.getQuads(sc, new NamedNode(NAMESPACES.WOTSEC + "in"), null)); List securityName = Utils.getObjectsFromQuads(rmlStore.getQuads(sc, new NamedNode(NAMESPACES.WOTSEC + "name"), null)); List securityValue = Utils.getObjectsFromQuads(rmlStore.getQuads(sc, new NamedNode(NAMESPACES.IDSA + "tokenValue"), null)); - // BearerSecurityScheme - List securityScheme = Utils.getObjectsFromQuads(rmlStore.getQuads(sc, new NamedNode(NAMESPACES.RDF + "type"), new NamedNode(NAMESPACES.WOTSEC + "BearerSecurityScheme"))); - if (securityScheme.size() != 0) { + if (isOAuth || isBearer) { + // BearerSecurityScheme + // OAuth2 specific + if (isOAuth) { + logger.debug("OAuth2 is used"); + Term securityAuth = Utils.getObjectsFromQuads(rmlStore.getQuads(sc, new NamedNode(NAMESPACES.WOTSEC + "authorization"), null)).get(0); + auth.get("info").put("authorization", securityAuth.getValue()); + auth.get("info").put("name", securityName.get(0).getValue()); + + Term securityRefresh = Utils.getObjectsFromQuads(rmlStore.getQuads(sc, new NamedNode(NAMESPACES.IDSA + "refreshValue"), null)).get(0); + Term securityClientID = Utils.getObjectsFromQuads(rmlStore.getQuads(sc, new NamedNode(NAMESPACES.IDSA + "clientID"), null)).get(0); + Term securityClientSecret = Utils.getObjectsFromQuads(rmlStore.getQuads(sc, new NamedNode(NAMESPACES.IDSA + "clientSecret"), null)).get(0); +// Term securityGrantType = Utils.getObjectsFromQuads(rmlStore.getQuads(sc, new NamedNode(NAMESPACES.WOTSEC + "grant_type"), null)).get(0); + + auth.get("data").put("refresh", securityRefresh.getValue()); + auth.get("data").put("client_id", securityClientID.getValue()); + auth.get("data").put("client_secret", securityClientSecret.getValue()); + logger.debug("Refresh token: {}", securityRefresh.getValue()); + logger.debug("Client ID: {}", securityClientID.getValue()); + logger.debug("Client Secret: {}", securityClientSecret.getValue()); +// //can this not be set default? +// auth.get("data").put("grant_type", securityGrantType.getValue()); + } + // both oath and bearer Term bearerToken = new Literal("Bearer " + securityValue.get(0).getValue()); securityValue.set(0, bearerToken); } - try { - switch (securityIn.get(0).getValue()) { - case "header": { - logger.info("Applying security configuration of " + sc.getValue() + "in header"); - logger.debug("Name: " + securityName.get(0).getValue()); - logger.debug("Value: " + securityValue.get(0).getValue()); - headers.put(securityName.get(0).getValue(), securityValue.get(0).getValue()); - break; - } - case "query": - case "body": - case "cookie": - default: - throw new NotImplementedException(); + if (securityIn.get(0).getValue().equals("header")) { + logger.info("Applying security configuration of {} in header", sc.getValue()); + logger.debug("Name: {}", securityName.get(0).getValue()); + logger.debug("Value: {}", securityValue.get(0).getValue()); + headers.put(securityName.get(0).getValue(), securityValue.get(0).getValue()); + } else { + throw new NotImplementedException(); } } catch (IndexOutOfBoundsException e) { - logger.warn("Unable to apply security configuration for " + sc.getValue()); + logger.warn("Unable to apply security configuration for {}", sc.getValue()); } } + } catch (IndexOutOfBoundsException e) { logger.warn("No td:Thing description, unable to determine security configurations, assuming no security policies apply"); @@ -170,18 +244,50 @@ public Access getAccess(Term logicalSource, QuadStore rmlStore) { for(Term h: header) { String headerName = Utils.getObjectsFromQuads(rmlStore.getQuads(h, new NamedNode(NAMESPACES.HTV + "fieldName"), null)).get(0).getValue(); String headerValue = Utils.getObjectsFromQuads(rmlStore.getQuads(h, new NamedNode(NAMESPACES.HTV + "fieldValue"), null)).get(0).getValue(); - logger.debug("Retrieved HTTP header: '" + headerName + "','" + headerValue + "'"); + logger.debug("Retrieved HTTP header: '{}','{}'", headerName, headerValue); headers.put(headerName, headerValue); } } catch(IndexOutOfBoundsException e) { - logger.warn("Unable to retrieve header name and value for " + headerListItem.getValue()); + logger.warn("Unable to retrieve header name and value for {}", headerListItem.getValue()); + } + } + access = new WoTAccess(target, contentType, headers, auth); + break; + case NAMESPACES.DCAT + "Distribution": + List dcatUrls = Utils.getObjectsFromQuads(rmlStore.getQuads(source, new NamedNode(NAMESPACES.DCAT + "downloadURL"), null)); + + if (dcatUrls.isEmpty()) { + throw new Error("No url found for the DCAT Distribution"); + } + + String dcatValue = dcatUrls.get(0).getValue(); + if (isRemoteFile(dcatValue)) { + + List refFormulationTerms = Utils.getObjectsFromQuads(rmlStore.getQuads(logicalSource, new NamedNode(NAMESPACES.RML2 + "referenceFormulation"), null)); + String mimetype = REF_FORM_MIMETYPE.get(refFormulationTerms.get(0).getValue()); + if (mimetype != null) { + access = new RemoteFileAccess(dcatValue, mimetype); + } else { + access = new RemoteFileAccess(dcatValue); + } + } else { + logger.debug("Local file found `{}`, trying in basePath '{}' and mapping path '{}'", dcatValue, this.basePath, this.mappingPath); + File f1 = new File(this.basePath, dcatValue); + File f2 = new File(this.mappingPath, dcatValue); + File f3 = new File(dcatValue); + if (f1.exists() || f3.exists()) { + access = new LocalFileAccess(dcatValue, this.basePath); + } else if (f2.exists()) { + access = new LocalFileAccess(dcatValue, this.mappingPath); + } + else { + throw new Error("Cannot find " + dcatValue); } - }; - access = new WoTAccess(target, contentType, headers); + } break; default: - throw new NotImplementedException(); + throw new NotImplementedException(sourceType.get(0).getValue()); } } @@ -200,9 +306,6 @@ public Access getAccess(Term logicalSource, QuadStore rmlStore) { */ private RDBAccess getRDBAccess(QuadStore rmlStore, Term source, Term logicalSource) { - // - Table - List tables = Utils.getObjectsFromQuads(rmlStore.getQuads(logicalSource, new NamedNode(NAMESPACES.RR + "tableName"), null)); - // Retrieve database information from source object // - Driver URL @@ -222,24 +325,18 @@ private RDBAccess getRDBAccess(QuadStore rmlStore, Term source, Term logicalSour } String dsn = dsnObject.get(0).getValue(); - dsn = dsn.substring(dsn.indexOf("//") + 2); - // - SQL query - String query; - List queryObject = Utils.getObjectsFromQuads(rmlStore.getQuads(logicalSource, new NamedNode(NAMESPACES.RML + "query"), null)); + String referenceFormulation = Utils.getObjectsFromQuads(rmlStore.getQuads(logicalSource, new NamedNode(NAMESPACES.RML2 + "referenceFormulation"), null)).get(0).getValue(); - if (queryObject.isEmpty()) { - if (tables.isEmpty()) { - // TODO better message (include Triples Map somewhere) + String query; + String iterator = Utils.getObjectsFromQuads(rmlStore.getQuads(logicalSource, new NamedNode(NAMESPACES.RML2 + "iterator"), null)).get(0).getValue(); - throw new Error("The Logical Source does not include a SQL query nor a target table."); - } else if (tables.get(0).getValue().equals("") || tables.get(0).getValue().equals("\"\"")) { - throw new Error("The table name of a database should not be empty."); - } else { - query = "SELECT * FROM " + tables.get(0).getValue(); - } + if (referenceFormulation.equals(ReferenceFormulation.RDBTable)) { + // rml:iterator contains the table name + query = String.format("SELECT * FROM %s", iterator); } else { - query = queryObject.get(0).getValue(); + // rml:iterator contains the query itself + query = iterator; } // - Username @@ -262,16 +359,17 @@ private RDBAccess getRDBAccess(QuadStore rmlStore, Term source, Term logicalSour // - ContentType List contentType = Utils.getObjectsFromQuads(rmlStore.getQuads(logicalSource, new NamedNode(NAMESPACES.RML + "referenceFormulation"), null)); - return new RDBAccess(dsn, database, username, password, query, (contentType.isEmpty()? "text/csv" : contentType.get(0).getValue())); + return new RDBAccess(dsn, database, username, password, query, (contentType.isEmpty() ? "text/csv" : contentType.get(0).getValue())); } /** * This method returns a SPARQLResultFormat based on the result formats and reference formulations. * @param resultFormats the result formats used to determine the SPARQLResultFormat. - * @param referenceFormulations the reference formulations used to to determine the SPARQLResultFormat. + * @param referenceFormulations the reference formulations used to determine the SPARQLResultFormat. * @return a SPARQLResultFormat. */ private SPARQLResultFormat getSPARQLResultFormat(List resultFormats, List referenceFormulations) { + logger.debug("Getting SPARQL result format for result format '{}' and reference formulations '{}'", resultFormats.toString(), referenceFormulations.toString()); if (resultFormats.isEmpty() && referenceFormulations.isEmpty()) { // This will never be called atm but may come in handy later throw new Error("Please specify the sd:resultFormat of the SPARQL endpoint or a rml:referenceFormulation."); } else if (referenceFormulations.isEmpty()) { @@ -296,9 +394,9 @@ private SPARQLResultFormat getSPARQLResultFormat(List resultFormats, List< throw new Error("Unsupported rml:referenceFormulation for a SPARQL source."); } else { for (SPARQLResultFormat format : SPARQLResultFormat.values()) { - - if (resultFormats.get(0).getValue().equals(format.getUri()) - && format.getReferenceFormulations().contains(referenceFormulations.get(0).getValue())) { + logger.debug(format + " " + resultFormats.get(0).getValue().equals(format.getUri()) + " " + format.getReferenceFormulations().contains(referenceFormulations.get(0).getValue())); + logger.debug(format.getReferenceFormulations().toString()); + if (resultFormats.get(0).getValue().equals(format.getUri())) { return format; } } diff --git a/src/main/java/be/ugent/rml/access/COMPRESSION.java b/src/main/java/be/ugent/rml/access/COMPRESSION.java deleted file mode 100644 index 4a91d055..00000000 --- a/src/main/java/be/ugent/rml/access/COMPRESSION.java +++ /dev/null @@ -1,5 +0,0 @@ -package be.ugent.rml.access; - -public class COMPRESSION { - public static final String GZIP = "gzip"; -} \ No newline at end of file diff --git a/src/main/java/be/ugent/rml/access/DatabaseType.java b/src/main/java/be/ugent/rml/access/DatabaseType.java deleted file mode 100644 index 189c274b..00000000 --- a/src/main/java/be/ugent/rml/access/DatabaseType.java +++ /dev/null @@ -1,79 +0,0 @@ -package be.ugent.rml.access; - -import java.util.Arrays; -import java.util.List; - -/* - NOTE: The Oracle driver has to be installed manually, because it's not on Maven due to licensing. - */ -public enum DatabaseType { - - MYSQL("MySQL", - "mysql:", - "mysql", - "com.mysql.cj.jdbc.Driver"), - POSTGRES("PostgreSQL", - "postgresql:", - "postgres", - "org.postgresql.Driver"), - SQL_SERVER("Microsoft SQL Server", - "sqlserver:", - "sqlserver", - "com.microsoft.sqlserver.jdbc.SQLServerDriver"), - ORACLE("Oracle", - "oracle:thin:@", - "oracle", - "oracle.jdbc.OracleDriver"), - DB2("IBM DB2", - "as400:", - "ibm", - "com.ibm.as400.access.AS400JDBCDriver"); - - private final String name; - private final String jdbcPrefix; - private final String driverSubstring; - private final String driver; - - private DatabaseType(String name, String jdbcPrefix, String driverSubstring, String driver) { - this.name = name; - this.jdbcPrefix = jdbcPrefix; - this.driverSubstring = driverSubstring; - this.driver = driver; - } - - public String toString() { - return this.name; - } - - public String getJDBCPrefix() { - return this.jdbcPrefix; - } - - public String getDriverSubstring() { - return this.driverSubstring; - } - - public String getDriver() { - return this.driver; - } - - /* - Retrieves the Database enum type from a given (driver) string - */ - public static DatabaseType getDBtype(String db) { - String dbLower = db.toLowerCase(); - List dbs = Arrays.asList(DatabaseType.values()); - - int i = 0; - - while (i < dbs.size() && !dbLower.contains(dbs.get(i).getDriverSubstring())) { - i++; - } - - if (i < dbs.size()) { - return dbs.get(i); - } else { - throw new Error("Couldn't find a driver for the given DB: " + db); - } - } -} diff --git a/src/main/java/be/ugent/rml/access/LocalFileAccess.java b/src/main/java/be/ugent/rml/access/LocalFileAccess.java deleted file mode 100644 index f64e9007..00000000 --- a/src/main/java/be/ugent/rml/access/LocalFileAccess.java +++ /dev/null @@ -1,98 +0,0 @@ -package be.ugent.rml.access; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.util.Map; - -import static be.ugent.rml.Utils.getHashOfString; -import static be.ugent.rml.Utils.getInputStreamFromFile; -import static org.apache.commons.io.FileUtils.getFile; -import static org.apache.commons.io.FilenameUtils.getExtension; - -/** - * This class represents access to a local file. - */ -public class LocalFileAccess implements Access { - - private String path; - private String basePath; - - /** - * This constructor takes the path and the base path of a file. - * @param path the relative path of the file. - * @param basePath the used base path. - */ - public LocalFileAccess(String path, String basePath) { - this.path = path; - this.basePath = basePath; - } - - /** - * This method returns the InputStream of the local file. - * @return an InputStream. - * @throws IOException - */ - @Override - public InputStream getInputStream() throws IOException { - File file = new File(this.path); - - if (!file.isAbsolute()) { - file = getFile(this.basePath, this.path); - } - - return getInputStreamFromFile(file); - } - - - /** - * This methods returns the datatypes of the file. - * This method always returns null, because the datatypes can't be determined from a local file for the moment. - * @return the datatypes of the file. - */ - @Override - public Map getDataTypes() { - return null; - } - - @Override - public boolean equals(Object o) { - if (o instanceof LocalFileAccess) { - LocalFileAccess access = (LocalFileAccess) o; - return path.equals(access.getPath()) && basePath.equals(access.getBasePath()); - } else { - return false; - } - } - - @Override - public int hashCode() { - return getHashOfString(getPath() + getBasePath()); - } - - /** - * This method returns the path of the access. - * @return the relative path. - */ - public String getPath() { - return path; - } - - /** - * This method returns the base path of the access. - * @return the base path. - */ - public String getBasePath() { - return basePath; - } - - @Override - public String toString() { - return this.path; - } - - @Override - public String getContentType() { - return getExtension(this.path); - } -} diff --git a/src/main/java/be/ugent/rml/access/RDBAccess.java b/src/main/java/be/ugent/rml/access/RDBAccess.java deleted file mode 100644 index 4af8d351..00000000 --- a/src/main/java/be/ugent/rml/access/RDBAccess.java +++ /dev/null @@ -1,447 +0,0 @@ -package be.ugent.rml.access; - -import be.ugent.rml.NAMESPACES; -import com.opencsv.CSVWriter; -import org.w3c.dom.Document; -import org.w3c.dom.Element; - -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.transform.OutputKeys; -import javax.xml.transform.Transformer; -import javax.xml.transform.TransformerFactory; -import javax.xml.transform.dom.DOMSource; -import javax.xml.transform.stream.StreamResult; -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.StringWriter; -import java.sql.*; -import java.util.HashMap; -import java.util.Map; - -import static be.ugent.rml.Utils.getHashOfString; - -/** - * This class represents the access to a relational database. - */ -public class RDBAccess implements Access { - - private String dsn; - private DatabaseType databaseType; - private String username; - private String password; - private String query; - private String contentType; - private Map datatypes = new HashMap<>(); - - // Datatype definitions - private final static String DOUBLE = "http://www.w3.org/2001/XMLSchema#double"; - private final static String VARBINARY = "http://www.w3.org/2001/XMLSchema#hexBinary"; - private final static String DECIMAL = "http://www.w3.org/2001/XMLSchema#decimal"; - private final static String INTEGER = "http://www.w3.org/2001/XMLSchema#integer"; - private final static String BOOLEAN = "http://www.w3.org/2001/XMLSchema#boolean"; - private final static String DATE = "http://www.w3.org/2001/XMLSchema#date"; - private final static String TIME = "http://www.w3.org/2001/XMLSchema#time"; - private final static String DATETIME = "http://www.w3.org/2001/XMLSchema#dateTime"; - - - /** - * This constructor takes as arguments the dsn, database, username, password, query, and content type. - * - * @param dsn the data source name. - * @param databaseType the database type. - * @param username the username of the user that executes the query. - * @param password the password of the above user. - * @param query the SQL query to use. - * @param contentType the content type of the results. - */ - public RDBAccess(String dsn, DatabaseType databaseType, String username, String password, String query, String contentType) { - this.dsn = dsn; - this.databaseType = databaseType; - this.username = username; - this.password = password; - this.query = query; - this.contentType = contentType; - } - - /** - * This method returns an InputStream of the results of the SQL query. - * - * @return an InputStream with the results. - * @throws IOException - */ - @Override - public InputStream getInputStream() throws IOException, SQLException, ClassNotFoundException { - // JDBC objects - Connection connection = null; - Statement statement = null; - String jdbcDriver = databaseType.getDriver(); - String jdbcDSN = "jdbc:" + databaseType.getJDBCPrefix() + "//" + dsn; - InputStream inputStream = null; - - try { - // Register JDBC driver - Class.forName(jdbcDriver); - - // Open connection - String connectionString = jdbcDSN; - boolean alreadySomeQueryParametersPresent = false; - - if (username != null && !username.equals("") && password != null && !password.equals("")) { - if (databaseType == DatabaseType.ORACLE) { - connectionString = connectionString.replace(":@", ":" + username + "/" + password + "@"); - } else if (!connectionString.contains("user=")) { - connectionString += "?user=" + username + "&password=" + password; - alreadySomeQueryParametersPresent = true; - } - } - - if (databaseType == DatabaseType.MYSQL) { - if (alreadySomeQueryParametersPresent) { - connectionString += "&"; - } else { - connectionString += "?"; - } - - connectionString += "serverTimezone=UTC&useSSL=false"; - } - - if (databaseType == DatabaseType.SQL_SERVER) { - connectionString = connectionString.replaceAll("\\?|&", ";"); - - if (!connectionString.endsWith(";")) { - connectionString += ";"; - } - } - connection = DriverManager.getConnection(connectionString); - - // Execute query - statement = connection.createStatement(); - ResultSet rs = statement.executeQuery(query); - - switch (contentType) { - case NAMESPACES.QL + "XPath" : - inputStream = getXMLInputStream(rs); - break; - default: - inputStream = getCSVInputStream(rs); - } - - - // Clean-up environment - rs.close(); - statement.close(); - connection.close(); - - } catch (Exception sqlE) { - throw sqlE; - } finally { - - // finally block used to close resources - try { - if (statement != null) { - statement.close(); - } - } catch (SQLException se2) { - }// nothing we can do - - try { - if (connection != null) { - connection.close(); - } - } catch (SQLException se) { - se.printStackTrace(); - } - } - - return inputStream; - } - - /** - * This method returns the datatypes used for the columns in the accessed database. - * - * @return a map of column names and their datatypes. - */ - @Override - public Map getDataTypes() { - return datatypes; - } - - /** - * This method creates an CSV-formatted InputStream from a Result Set. - * - * @param rs the Result Set that is used. - * @return a CSV-formatted InputStream. - * @throws SQLException - */ - private InputStream getCSVInputStream(ResultSet rs) throws SQLException { - // Get number of requested columns - ResultSetMetaData rsmd = rs.getMetaData(); - int columnCount = rsmd.getColumnCount(); - - boolean filledInDataTypes = false; - StringWriter writer = new StringWriter(); - - try { - // Differentiate null and "" - CSVWriter csvWriter = new CSVWriter(writer); - csvWriter.writeNext(getCSVHeader(rsmd, columnCount)); - - // Extract data from result set - while (rs.next()) { - String[] csvRow = new String[columnCount]; - - // Iterate over column names - for (int i = 1; i <= columnCount; i++) { - String columnName = rsmd.getColumnLabel(i); - String dataType = getColumnDataType(rsmd.getColumnTypeName(i)); - - // Register datatype during first encounter - if (!filledInDataTypes) { - if (dataType != null) { - datatypes.put(columnName, dataType); - } - } - - // Normalize value and add value to CSV row. - if (VARBINARY.equals(dataType)) { - byte[] data = rs.getBytes(columnName); - csvRow[i - 1] = bytesToHexString(data); - } else { - String data = rs.getString(columnName); - csvRow[i - 1] = normalizeData(data, dataType); - } - } - - // Add CSV row to CSVPrinter. - // non-varargs call - csvWriter.writeNext(csvRow); - filledInDataTypes = true; - } - csvWriter.close(); - } catch (IOException e) { - e.printStackTrace(); - } - - // Get InputStream from StringWriter. - return new ByteArrayInputStream(writer.toString().getBytes()); - } - - private InputStream getXMLInputStream(ResultSet rs) throws SQLException { - // Get number of requested columns - ResultSetMetaData rsmd = rs.getMetaData(); - int columnCount = rsmd.getColumnCount(); - - StringWriter writer = new StringWriter(); - - // Create document - DocumentBuilder builder; - try { - builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); - Document doc = builder.newDocument(); - - Element rootElement = doc.createElement("Results"); - doc.appendChild(rootElement); - // Extract data from result set - while (rs.next()) { - Element row = doc.createElement("row"); - rootElement.appendChild(row); - - // Iterate over column names - for (int i = 1; i <= columnCount; i++) { - Element el = doc.createElement(rsmd.getColumnName(i)); - el.appendChild(doc.createTextNode(rs.getObject(i).toString())); - row.appendChild(el); - } - } - - TransformerFactory transformerFactory = TransformerFactory.newInstance(); - Transformer transformer = transformerFactory.newTransformer(); - transformer.setOutputProperty(OutputKeys.INDENT, "yes"); - transformer.setOutputProperty(OutputKeys.METHOD, "xml"); - - transformer.transform(new DOMSource(doc), new StreamResult(writer)); - } catch (Exception e) { - e.printStackTrace(); - } - - // Get InputStream from StringWriter. - return new ByteArrayInputStream(writer.toString().getBytes()); - - } - - - /** - * This method returns the corresponding datatype for a SQL datatype. - * - * @param type the SQL datatype. - * @return the url of the corresponding datatype. - */ - private String getColumnDataType(String type) { - switch (type.toUpperCase()) { - case "BYTEA": - case "BINARY": - case "BINARY VARYING": - case "BINARY LARGE OBJECT": - case "VARBINARY": - return VARBINARY; - case "NUMERIC": - case "DECIMAL": - return DECIMAL; - case "SMALLINT": - case "INT": - case "INT4": - case "INT8": - case "INTEGER": - case "BIGINT": - return INTEGER; - case "FLOAT": - case "FLOAT4": - case "FLOAT8": - case "REAL": - case "DOUBLE": - case "DOUBLE PRECISION": - return DOUBLE; - case "BIT": - case "BOOL": - case "BOOLEAN": - return BOOLEAN; - case "DATE": - return DATE; - case "TIME": - return TIME; - case "TIMESTAMP": - case "DATETIME": - return DATETIME; - } - return null; - } - - /** - * This method returns the header of the CSV. - * - * @param rsmd metdata of the Result Set - * @param columnCount the number of columns. - * @return a String array with the headers. - * @throws SQLException - */ - private String[] getCSVHeader(final ResultSetMetaData rsmd, final int columnCount) throws SQLException { - String[] headers = new String[columnCount]; - - for (int i = 1; i <= columnCount; i++) { - headers[i - 1] = rsmd.getColumnLabel(i); - // Setting the empty header label at be.ugent.rml.access.RDBAccess.nullheader (as otherwise CSV parsers might fail), - // (this header cannot be used by actual mapping files so this should actually not give any issues) - // and hope that this header will NEVER be encountered in real-world tables - if (headers[i - 1] == null || headers[i - 1].equals("")) { - headers[i - 1] = "be.ugent.rml.access.RDBAccess.nullheader"; - } - } - - return headers; - } - - /** - * Convert a sequence of bytes to a string representation using uppercase hex symbols - * @param bytes the bytes to convert - * @return a string containing the hexadecimal representation of the byte array - */ - private static String bytesToHexString(byte[] bytes) { - StringBuilder builder = new StringBuilder(); - for (byte b : bytes) { - // format: 0 flag for zero-padding, 2 character width, uppercase hexadecimal symbols - builder.append(String.format("%02X", b)); - } - return builder.toString(); - } - - /** - * Normalize the string representation of a data value given by the RDB. - * @param data the string representation retrieved from the RDB of the data to be normalized. - * @param dataType the intended datatype of the data parameter. - * @return Normalized string representation of the data parameter, given the datatype. - */ - private static String normalizeData(String data, String dataType) { - if (DOUBLE.equals(dataType)) { - // remove trailing decimal points (Quirk from MySQL, see issue 203) - return data.replace(".0", ""); - } - return data; - } - - @Override - public boolean equals(Object o) { - if (o instanceof RDBAccess) { - RDBAccess access = (RDBAccess) o; - - return dsn.equals(access.getDSN()) - && databaseType.equals(access.getDatabaseType()) - && username.equals(access.getUsername()) - && password.equals(access.getPassword()) - && query.equals(access.getQuery()) - && contentType.equals(access.getContentType()); - } else { - return false; - } - } - - @Override - public int hashCode() { - return getHashOfString(getDSN() + getDatabaseType() + getUsername() + getPassword() + getQuery() + getContentType()); - } - - /** - * This method returns the DNS. - * - * @return the DNS. - */ - public String getDSN() { - return dsn; - } - - /** - * This method returns the database type. - * - * @return the database type. - */ - public DatabaseType getDatabaseType() { - return databaseType; - } - - /** - * This method returns the username. - * - * @return the username. - */ - public String getUsername() { - return username; - } - - /** - * This method returns the password. - * - * @return the password. - */ - public String getPassword() { - return password; - } - - /** - * This method returns the SQL query. - * - * @return the SQL query. - */ - public String getQuery() { - return query; - } - - /** - * This method returns the content type. - * - * @return the content type. - */ - public String getContentType() { - return contentType; - } -} diff --git a/src/main/java/be/ugent/rml/access/RemoteFileAccess.java b/src/main/java/be/ugent/rml/access/RemoteFileAccess.java deleted file mode 100644 index 49666b2e..00000000 --- a/src/main/java/be/ugent/rml/access/RemoteFileAccess.java +++ /dev/null @@ -1,78 +0,0 @@ -package be.ugent.rml.access; - -import java.io.IOException; -import java.io.InputStream; -import java.net.URL; -import java.util.Map; - -import static be.ugent.rml.Utils.getHashOfString; -import static be.ugent.rml.Utils.getInputStreamFromURL; - -/** - * This class represents access to a remote file. - */ -public class RemoteFileAccess implements Access { - - private String location; - private String contentType; - - public RemoteFileAccess(String location) { - this(location, ""); - } - - /** - * This constructor of RemoteFileAccess taking location and content type as arguments. - * @param location the location of the remote file. - * @param contentType the content type of the remote file. - */ - public RemoteFileAccess(String location, String contentType) { - this.location = location; - this.contentType = contentType; - } - - @Override - public InputStream getInputStream() throws IOException { - return getInputStreamFromURL(new URL(location), contentType); - } - - /** - * This methods returns the datatypes of the file. - * This method always returns null, because the datatypes can't be determined from a remote file for the moment. - * @return the datatypes of the file. - */ - @Override - public Map getDataTypes() { - return null; - } - - @Override - public boolean equals(Object o) { - if (o instanceof RemoteFileAccess) { - RemoteFileAccess access = (RemoteFileAccess) o; - return location.equals(access.getLocation()) && contentType.equals(access.getContentType()); - } else { - return false; - } - } - - @Override - public int hashCode() { - return getHashOfString(getLocation() + getContentType()); - } - - /** - * The method returns the location of the remote file. - * @return the location. - */ - public String getLocation() { - return location; - } - - /** - * This method returns the content type of the remote file. - * @return the content type. - */ - public String getContentType() { - return contentType; - } -} diff --git a/src/main/java/be/ugent/rml/access/SPARQLEndpointAccess.java b/src/main/java/be/ugent/rml/access/SPARQLEndpointAccess.java deleted file mode 100644 index ffd2f216..00000000 --- a/src/main/java/be/ugent/rml/access/SPARQLEndpointAccess.java +++ /dev/null @@ -1,125 +0,0 @@ -package be.ugent.rml.access; - -import be.ugent.rml.Utils; -import org.apache.jena.base.Sys; - -import java.io.*; -import java.net.HttpURLConnection; -import java.net.URL; -import java.util.HashMap; -import java.util.Map; - -import static be.ugent.rml.Utils.getHashOfString; - -/** - * This class represents the access to a SPARQL endpoint. - */ -public class SPARQLEndpointAccess implements Access { - - private String contentType; - private String endpoint; - private String query; - - /** - * This constructor takes a content type, url of the endpoint, and a SPARQL query as arguments. - * @param contentType the content type of the results. - * @param endpoint the url of the SPARQL endpoint. - * @param query the SPARQL query used on the endpoint. - */ - public SPARQLEndpointAccess(String contentType, String endpoint, String query) { - this.contentType = contentType; - this.endpoint = endpoint; - this.query = query; - } - - /** - * This method returns an InputStream of the results of the SPARQL endpoint. - * @return an InputStream. - * @throws IOException - */ - @Override - public InputStream getInputStream() throws IOException { - // Query the endpoint - URL url = new URL(endpoint); - HttpURLConnection connection = (HttpURLConnection) url.openConnection(); - connection.setRequestMethod("GET"); - - // Set 'Accept' header - connection.setRequestProperty("Accept", contentType); - - // Set 'query' parameter - Map urlParams = new HashMap() {{ - put("query", query); - }}; - - connection.setDoOutput(true); - DataOutputStream out = new DataOutputStream(connection.getOutputStream()); - out.writeBytes(Utils.getURLParamsString(urlParams)); - out.flush(); - out.close(); - - // TODO check this code - int status = connection.getResponseCode(); - - return connection.getInputStream(); - } - - @Override - public boolean equals(Object o) { - if (o instanceof SPARQLEndpointAccess) { - SPARQLEndpointAccess access = (SPARQLEndpointAccess) o; - return endpoint.equals(access.getEndpoint()) && contentType.equals(access.getContentType()) && query.equals(access.getQuery()); - } else { - return false; - } - } - - @Override - public int hashCode() { - return getHashOfString(getEndpoint() + getQuery() + getContentType()); - } - - /** - * This methods returns the datatypes of the results of the SPARQL query. - * This method always returns null at the moment. - * @return the datatypes of the results of the SPARQL query. - */ - @Override - public Map getDataTypes() { - return null; - } - - /** - * This method returns the content type of the results. - * @return the content type of the results. - */ - public String getContentType() { - return contentType; - } - - /** - * This method returns the url of the endpoint. - * @return the url of the endpoint. - */ - public String getEndpoint() { - return endpoint; - } - - /** - * This method returns the SPARQL query that is used to get the results. - * @return the SPARQL query that is used to get the results. - */ - public String getQuery() { - return query; - } - - /** - * Clean a SPARQLQuery by removing whitespaces or comments - * @param query The SPARQL query - * @return The cleaned query - */ - public static String cleanQuery(String query){ - // Original, naive implementation that could lead to malformed queries - return query.replaceAll("[\r\n]+", " ").trim(); - } -} diff --git a/src/main/java/be/ugent/rml/access/WoTAccess.java b/src/main/java/be/ugent/rml/access/WoTAccess.java deleted file mode 100644 index b840459c..00000000 --- a/src/main/java/be/ugent/rml/access/WoTAccess.java +++ /dev/null @@ -1,83 +0,0 @@ -package be.ugent.rml.access; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.io.InputStream; -import java.net.URL; -import java.util.HashMap; -import java.util.Map; - -import static be.ugent.rml.Utils.getHashOfString; -import static be.ugent.rml.Utils.getInputStreamFromURL; - -public class WoTAccess implements Access { - - private static final Logger logger = LoggerFactory.getLogger(WoTAccess.class); - private String location; - private String contentType; - private HashMap headers; - - /** - * This constructor of WoTAccess taking location and content type as arguments. - * @param location the location of the WoT Thing. - * @param contentType the content type of the WoT Thing. - */ - public WoTAccess (String location, String contentType, HashMap headers) { - this.location = location; - this.contentType = contentType; - this.headers = headers; - logger.debug("Created WoTAccess:\n\tlocation:" + this.location + "\n\tcontent-type:" + this.contentType); - logger.debug(headers.toString()); - headers.forEach((name, value) -> { - logger.debug("Header: " + name + ": " + value); - }); - } - - @Override - public InputStream getInputStream() throws IOException { - return getInputStreamFromURL(new URL(location), contentType, headers); - } - - /** - * This methods returns the datatypes of the WoT Thing. - * This method always returns null, because the datatypes can't be determined from a WoT Thing for the moment. - * @return the datatypes of the file. - */ - @Override - public Map getDataTypes() { - return null; - } - - @Override - public boolean equals(Object o) { - if (o instanceof WoTAccess) { - WoTAccess access = (WoTAccess) o; - return location.equals(access.getLocation()) && contentType.equals(access.getContentType()); - } else { - return false; - } - } - - @Override - public int hashCode() { - return getHashOfString(getLocation() + getContentType()); - } - - /** - * The method returns the location of the remote file. - * @return the location. - */ - public String getLocation() { - return location; - } - - /** - * This method returns the content type of the remote file. - * @return the content type. - */ - public String getContentType() { - return contentType; - } -} \ No newline at end of file diff --git a/src/main/java/be/ugent/rml/cli/Main.java b/src/main/java/be/ugent/rml/cli/Main.java index c650ff6a..7faf67c7 100644 --- a/src/main/java/be/ugent/rml/cli/Main.java +++ b/src/main/java/be/ugent/rml/cli/Main.java @@ -1,23 +1,24 @@ package be.ugent.rml.cli; +import be.ugent.idlab.knows.functions.agent.Agent; +import be.ugent.idlab.knows.functions.agent.AgentFactory; +import be.ugent.knows.idlabFunctions.IDLabFunctions; import be.ugent.rml.Executor; +import be.ugent.rml.NAMESPACES; import be.ugent.rml.StrictMode; import be.ugent.rml.Utils; import be.ugent.rml.conformer.MappingConformer; -import be.ugent.rml.functions.FunctionLoader; -import be.ugent.rml.functions.lib.IDLabFunctions; import be.ugent.rml.metadata.MetadataGenerator; import be.ugent.rml.records.RecordsFactory; +import be.ugent.rml.store.Quad; import be.ugent.rml.store.QuadStore; import be.ugent.rml.store.RDF4JStore; -import be.ugent.rml.store.SimpleQuadStore; import be.ugent.rml.target.Target; import be.ugent.rml.target.TargetFactory; import be.ugent.rml.term.NamedNode; import be.ugent.rml.term.Term; import ch.qos.logback.classic.Level; import org.apache.commons.cli.*; -import org.apache.commons.io.IOUtils; import org.eclipse.rdf4j.rio.RDFFormat; import org.eclipse.rdf4j.rio.RDFParseException; import org.slf4j.Logger; @@ -33,25 +34,39 @@ import java.util.*; import java.util.stream.Collectors; -import static be.ugent.rml.StrictMode.*; +import static be.ugent.rml.StrictMode.BEST_EFFORT; +import static be.ugent.rml.StrictMode.STRICT; public class Main { private static final Logger logger = LoggerFactory.getLogger(Main.class); private static final Marker fatal = MarkerFactory.getMarker("FATAL"); + private static final String defaultBaseIRI = "http://example.com"; + public static void main(String[] args) { - main(args, System.getProperty("user.dir")); + try { + run(args, System.getProperty("user.dir")); + } catch (Exception e) { + System.out.println(e); + System.exit(1); + } + } + + public static void run(String[] args) throws Exception { + run(args, System.getProperty("user.dir")); } /** - * Main method use for the CLI. Allows to also set the current working directory via the argument basePath. + * Main method use for the CLI. Allows to also set the current working directory + * via the argument basePath. * * @param args the CLI arguments * @param basePath the basePath used during the execution. */ - public static void main(String[] args, String basePath) { + public static void run(String[] args, String basePath) throws Exception { Options options = new Options(); + boolean onlyConvertMapping = false; Option mappingdocOption = Option.builder("m") .longOpt("mappingfile") .hasArg() @@ -85,7 +100,7 @@ public static void main(String[] args, String basePath) { .build(); Option removeduplicatesOption = Option.builder("d") .longOpt("duplicates") - .desc("remove duplicates in the output") + .desc("remove duplicates in the HDT, N-Triples, or N-Quads output") .build(); Option configfileOption = Option.builder("c") .longOpt("configfile") @@ -112,7 +127,7 @@ public static void main(String[] args, String basePath) { .build(); Option serializationFormatOption = Option.builder("s") .longOpt("serialization") - .desc("serialization format (nquads (default), turtle, trig, trix, jsonld, hdt)") + .desc("serialization format (nquads (default), turtle, trig, trix, jsonld, hdt, jelly)") .hasArg() .build(); Option jdbcDSNOption = Option.builder("dsn") @@ -139,6 +154,15 @@ public static void main(String[] args, String basePath) { .desc("Base IRI used to expand relative IRIs in generated terms in the output.") .hasArg() .build(); + Option provideOwnEOFMarkerOption = Option.builder() + .longOpt("disable-automatic-eof-marker") + .desc("Setting this option assumes input data has a kind of End-of-File marker. " + + "Don't use unless you're absolutely sure what you're doing!") + .build(); + Option convertMapping = Option.builder() + .longOpt("convert-mapping") + .desc("Only convert the mapping to the latest RML specification by the W3C Community Group") + .build(); options.addOption(mappingdocOption); options.addOption(privateSecurityDataOption); @@ -157,6 +181,8 @@ public static void main(String[] args, String basePath) { options.addOption(usernameOption); options.addOption(strictModeOption); options.addOption(baseIriOption); + options.addOption(provideOwnEOFMarkerOption); + options.addOption(convertMapping); CommandLineParser parser = new DefaultParser(); try { @@ -167,7 +193,9 @@ public static void main(String[] args, String basePath) { Properties configFile = null; if (lineArgs.hasOption("c")) { configFile = new Properties(); - configFile.load(Utils.getReaderFromLocation(lineArgs.getOptionValue("c"))); + try (Reader reader = Utils.getReaderFromLocation(lineArgs.getOptionValue("c"))) { + configFile.load(reader); + } } if (checkOptionPresence(helpOption, lineArgs, configFile)) { @@ -181,12 +209,27 @@ public static void main(String[] args, String basePath) { setLoggerLevel(Level.ERROR); } + if (checkOptionPresence(convertMapping, lineArgs, configFile)) { + onlyConvertMapping = true; + } + String[] mOptionValue = getOptionValues(mappingdocOption, lineArgs, configFile); List lis = new ArrayList<>(); if (mOptionValue == null && System.console() != null) { printHelp(options); - System.exit(1); + throw new IllegalArgumentException("No mapping file nor via stdin found!"); + } + + String outputFile = getPriorityOptionValue(outputfileOption, lineArgs, configFile); + // If output path exists and contains 'directory-like' characters + if (outputFile != null) { + // Windows paths 🤷‍♂️ + outputFile = outputFile.replaceAll("\\\\", "/"); + if (!Utils.checkPathParent(outputFile, null)) { + logger.error(fatal, "The given output path does not exist."); + throw new IllegalArgumentException("The given output path does not exist."); + } } if (mOptionValue != null) { @@ -199,10 +242,30 @@ public static void main(String[] args, String basePath) { try { BufferedInputStream bis = new BufferedInputStream(System.in); int available = bis.available(); - if (bis.available() != 0) { - lis.add(bis); + if (available > 0) { + // This little hack solves Maven tests: if the console is detached + // the normal System.in could send EOT bytes to indicate that there is no + // input. + // So we check if there are other bytes than the (EOT) / End of File (EOF) bytes: 04 + byte[] firstBytes = new byte[32]; + bis.mark(32); + int bytesRead = bis.read(firstBytes); + bis.reset(); + if (bytesRead > 0) { + boolean addStream = false; + for (byte aByte : firstBytes) { + if (aByte != 0 && aByte != 4) { // 4 is the EOF / EOT byte + addStream = true; + break; + } + } + if (addStream) { + lis.add(bis); + } + } } } catch (IOException ex) { + logger.warn("Error trying to check System.in: {}", ex.getMessage()); // The inputstream is closed when read. Leads to IOExceptions for tests that don't provide their own inputstream } @@ -222,7 +285,7 @@ public static void main(String[] args, String basePath) { } catch (RDFParseException e) { logger.error(fatal, "Unable to parse mapping rules as Turtle. Does the file exist and is it valid Turtle?", e); - System.exit(1); + throw new IllegalArgumentException("Unable to parse mapping rules as Turtle. Does the file exist and is it valid Turtle?"); } // Private security data is optionally @@ -232,30 +295,23 @@ public static void main(String[] args, String basePath) { List lisPrivateSecurityData = Arrays.stream(mOptionValuePrivateSecurityData) .map(Utils::getInputStreamFromFileOrContentString) .collect(Collectors.toList()); - InputStream isPrivateSecurityData = new SequenceInputStream(Collections.enumeration(lisPrivateSecurityData)); - try { + + try (InputStream isPrivateSecurityData = new SequenceInputStream(Collections.enumeration(lisPrivateSecurityData))) { rmlStore.read(isPrivateSecurityData, null, RDFFormat.TURTLE); } catch (RDFParseException e) { logger.debug(e.getMessage()); logger.error(fatal, "Unable to parse private security data as Turtle. Does the file exist and is it valid Turtle?"); - System.exit(1); + throw new IllegalArgumentException("Unable to parse private security data as Turtle. Does the file exist and is it valid Turtle?"); } } - - // Convert mapping file to RML if needed. - MappingConformer conformer = new MappingConformer(rmlStore, mappingOptions); - + + String mappingPath = ""; try { - boolean conversionNeeded = conformer.conform(); - - if (conversionNeeded) { - logger.info("Conversion to RML was needed."); - } + mappingPath = Utils.getFile(lineArgs.getOptionValue('m')).getParent(); } catch (Exception e) { - logger.error(fatal, "Failed to make mapping file conformant to RML spec.", e); + logger.debug("Mapping path unknown as mapping file supplied via stdin"); } - - RecordsFactory factory = new RecordsFactory(basePath); + RecordsFactory factory = new RecordsFactory(basePath, mappingPath); String outputFormat = getPriorityOptionValue(serializationFormatOption, lineArgs, configFile); QuadStore outputStore = getStoreForFormat(outputFormat); @@ -299,38 +355,6 @@ public static void main(String[] args, String basePath) { } } - String[] fOptionValue = getOptionValues(functionfileOption, lineArgs, configFile); - FunctionLoader functionLoader; - - // Read function description files. - if (fOptionValue == null) { - functionLoader = new FunctionLoader(); - } else { - logger.debug("Using custom path to functions.ttl file: " + Arrays.toString(fOptionValue)); - RDF4JStore functionDescriptionTriples = new RDF4JStore(); - functionDescriptionTriples.read(Utils.getInputStreamFromFile(Utils.getFile("functions_idlab.ttl")), null, RDFFormat.TURTLE); - Map libraryMap = new HashMap<>(); - libraryMap.put("IDLabFunctions", IDLabFunctions.class); - List lisF = Arrays.stream(fOptionValue) - .map(Utils::getInputStreamFromFileOrContentString) - .collect(Collectors.toList()); - for (int i = 0; i < lisF.size(); i++) { - functionDescriptionTriples.read(lisF.get(i), null, RDFFormat.TURTLE); - } - functionLoader = new FunctionLoader(functionDescriptionTriples, libraryMap); - } - - if (mOptionValue != null) { - /* - * We have to get the InputStreams of the RML documents again, - * because we can only use an InputStream once - */ - lis = Arrays.stream(mOptionValue) - .map(Utils::getInputStreamFromFileOrContentString) - .collect(Collectors.toList()); - } - is = new SequenceInputStream(Collections.enumeration(lis)); - boolean strict = checkOptionPresence(strictModeOption, lineArgs, configFile); StrictMode strictMode = strict ? STRICT : BEST_EFFORT; @@ -341,12 +365,24 @@ public static void main(String[] args, String basePath) { if (strictMode.equals(STRICT)) { throw new Exception("When running in strict mode, a base IRI argument must be set."); } else { + if (mOptionValue != null) { + /* + * We have to get the InputStreams of the RML documents again, + * because we can only use an InputStream once + */ + lis = Arrays.stream(mOptionValue) + .map(Utils::getInputStreamFromFileOrContentString) + .collect(Collectors.toList()); + } // Best-effort mode, use the @base directive as a fallback - baseIRI = Utils.getBaseDirectiveTurtle(is); + try (InputStream is2 = new SequenceInputStream(Collections.enumeration(lis))) { + baseIRI = Utils.getBaseDirectiveTurtleOrDefault(is2, defaultBaseIRI); + } } } - executor = new Executor(rmlStore, factory, functionLoader, outputStore, baseIRI, strictMode); + String[] fOptionValue = getOptionValues(functionfileOption, lineArgs, configFile); + final Agent functionAgent; List triplesMaps = new ArrayList<>(); @@ -358,6 +394,39 @@ public static void main(String[] args, String basePath) { }); } + // Read function description files. + if (fOptionValue == null) { + // default initialisation with IDLab functions and GREL functions... + functionAgent = AgentFactory.createFromFnO( + "fno/functions_idlab.ttl", "fno/functions_idlab_classes_java_mapping.ttl", + "fno_idlab_old/functions_idlab.ttl", "fno_idlab_old/functions_idlab_classes_java_mapping.ttl", + "functions_grel.ttl", + "grel_java_mapping.ttl"); + } else { + logger.debug("Using custom path to functions.ttl file: {}", Arrays.toString(fOptionValue)); + String[] optionWithIDLabFunctionArgs = new String[fOptionValue.length + 4]; + optionWithIDLabFunctionArgs[0] = "fno/functions_idlab.ttl" ; + optionWithIDLabFunctionArgs[1] = "fno/functions_idlab_classes_java_mapping.ttl" ; + optionWithIDLabFunctionArgs[2] = "fno_idlab_old/functions_idlab.ttl" ; + optionWithIDLabFunctionArgs[3] = "fno_idlab_old/functions_idlab_classes_java_mapping.ttl" ; + System.arraycopy(fOptionValue, 0, optionWithIDLabFunctionArgs, 4, fOptionValue.length); + functionAgent = AgentFactory.createFromFnO(optionWithIDLabFunctionArgs); + } + + executor = new Executor(rmlStore, factory, outputStore, baseIRI, strictMode, functionAgent, mappingOptions); + + if (onlyConvertMapping) { + logger.debug("Outputting converted mapping following the latest RML specification"); + writeOutput(rmlStore, outputFile, "turtle"); + System.exit(0); + } + + if (checkOptionPresence(provideOwnEOFMarkerOption, lineArgs, configFile)) { + logger.warn("Automatic EOF marker disabled!"); + executor.setEOFProvidedInData(); + } + + executor.verifySources(basePath, mappingPath); if (metadataGenerator != null) { metadataGenerator.preMappingGeneration(triplesMaps.isEmpty() ? executor.getTriplesMaps() : triplesMaps, rmlStore); @@ -365,54 +434,98 @@ public static void main(String[] args, String basePath) { // Get start timestamp for post mapping metadata String startTimestamp = Instant.now().toString(); + QuadStore result = null; try { - HashMap targets = executor.executeV5(triplesMaps, checkOptionPresence(removeduplicatesOption, lineArgs, configFile), - metadataGenerator); - QuadStore result = targets.get(new NamedNode("rmlmapper://default.store")); - - // Get stop timestamp for post mapping metadata - String stopTimestamp = Instant.now().toString(); - - // Generate post mapping metadata and output all metadata - if (metadataGenerator != null) { - metadataGenerator.postMappingGeneration(startTimestamp, stopTimestamp, - result); + Map targets = executor.execute(triplesMaps, checkOptionPresence(removeduplicatesOption, lineArgs, configFile), metadataGenerator); + } catch (Exception e) { + logger.error(e.getMessage()); + throw e; + } finally { + functionAgent.close(); + } - writeOutput(metadataGenerator.getResult(), metadataFile, outputFormat); + Map targets = executor.getTargets(); + if (targets != null) { + result = targets.get(new NamedNode("rmlmapper://default.store")); + if(result != null) { + result.copyNameSpaces(rmlStore); } - String outputFile = getPriorityOptionValue(outputfileOption, lineArgs, configFile); result.copyNameSpaces(rmlStore); - writeOutputTargets(targets, rmlStore, basePath, outputFile, outputFormat); - } catch (Exception e) { - logger.error(e.getMessage()); + IDLabFunctions.saveState(); + + writeOutputTargets(targets, rmlStore, basePath, outputFile, outputFormat, mappingPath); + + } + // Get stop timestamp for post mapping metadata + String stopTimestamp = Instant.now().toString(); + + // Generate post mapping metadata and output all metadata + if (metadataGenerator != null && targets != null) { + metadataGenerator.postMappingGeneration(startTimestamp, stopTimestamp, result); + writeOutput(metadataGenerator.getResult(), metadataFile, outputFormat); } } catch (ParseException exp) { // oops, something went wrong - logger.error("Parsing failed. Reason: " + exp.getMessage()); + logger.error("Parsing failed. Reason: {}", exp.getMessage()); printHelp(options); + } catch (IllegalArgumentException exp) { + throw exp; } catch (Exception e) { logger.error(e.getMessage(), e); + throw e; } } - private static void writeOutputTargets(HashMap targets, QuadStore rmlStore, String basePath, String outputFileDefault, String outputFormatDefault) throws Exception { + private static void writeOutputTargets(Map targets, QuadStore rmlStore, String basePath, String outputFileDefault, String outputFormatDefault, String mappingPath) throws Exception { boolean hasNoResults = true; - logger.debug("Writing to Targets: " + targets.keySet()); - TargetFactory targetFactory = new TargetFactory(basePath); + logger.debug("Writing to Targets: {}", targets.keySet()); + TargetFactory targetFactory = new TargetFactory(basePath, mappingPath); + + // check if anything needs to be added to the rmlstore (e.g. dynamic targets) + if (targets.containsKey(new NamedNode(NAMESPACES.RMLE + "ThisMapping"))){ + rmlStore.addQuads(targets.get(new NamedNode(NAMESPACES.RMLE + "ThisMapping")).getQuads(null, null, null)); + // The generated dynamic logical targets have been added to the RML store. + // It is possible that they are written using the old RML terminology. + // Therefore, the mapping conformer needs to conform the RML store again. + MappingConformer conformer = new MappingConformer(rmlStore, null); + try { + boolean conversionNeeded = conformer.conform(); + + if (conversionNeeded) { + logger.info("Conversion to RML was needed."); + } + } catch (Exception e) { + logger.error("Failed to make dynamic targets conformant to RML spec.", e); + } + targets.remove(new NamedNode(NAMESPACES.RMLE + "ThisMapping")); + } // Go over each term and export to the Target if needed for (Map.Entry termTargetMapping: targets.entrySet()) { Term term = termTargetMapping.getKey(); QuadStore store = termTargetMapping.getValue(); - if (store.size() > 0) { + if (!store.isEmpty()) { hasNoResults = false; - logger.info("Target: " + term + " has " + store.size() + " results"); + logger.info("Target: {} has {} results", term, store.size()); + } + + /* Remove magic marker from output */ + List quads = store.getQuads(null, null, null, null); + for (Quad q: quads) { + String subject = q.getSubject().toString(); + String object = q.getObject().toString(); + if (subject.contains(IDLabFunctions.MAGIC_MARKER_ENCODED) + || subject.contains(IDLabFunctions.MAGIC_MARKER) + || object.contains(IDLabFunctions.MAGIC_MARKER_ENCODED) + || object.contains(IDLabFunctions.MAGIC_MARKER) ) { + store.removeQuads(q.getSubject(), q.getPredicate(), q.getObject(), q.getGraph()); + } } // Default target is exported separately for backwards compatibility reasons @@ -421,26 +534,21 @@ private static void writeOutputTargets(HashMap targets, QuadSto writeOutput(store, outputFileDefault, outputFormatDefault); } else { - logger.debug("Exporting to Target: " + term); + logger.debug("Exporting to Target: {}", term); + Target target = targetFactory.getTarget(term, rmlStore, store); if (store.size() > 1) { - logger.info(store.size() + " quads were generated for " + term + " Target"); + logger.info("{} quads were generated for {} Target", store.size(), target); } else { - logger.info(store.size() + " quad was generated " + term + " Target"); + logger.info("{} quad was generated {} Target", store.size(), target); } - Target target = targetFactory.getTarget(term, rmlStore); String serializationFormat = target.getSerializationFormat(); OutputStream output = target.getOutputStream(); - - // Set character encoding - Writer out = new BufferedWriter(new OutputStreamWriter(output, Charset.defaultCharset())); - - // Write store to target - store.write(out, serializationFormat); - + store.addQuads(target.getMetadata()); + store.write(output, serializationFormat); // Close OS resources - out.close(); target.close(); + logger.debug("Exporting to Target: {}", target); } } @@ -519,19 +627,20 @@ private static File writeOutputUncompressed(QuadStore store, String outputFile, File targetFile = null; if (store.size() > 1) { - logger.info(store.size() + " quads were generated for default Target"); + logger.info("{} quads were generated for default Target", store.size()); } else { - logger.info(store.size() + " quad was generated for default Target"); + logger.info("{} quad was generated for default Target", store.size()); } + OutputStream out = null; try { - Writer out; - String doneMessage = null; + String doneMessage = null; + boolean isSystemOut = false; //if output file provided, write to triples output file if (outputFile != null) { targetFile = new File(outputFile); - logger.info("Writing quads to " + targetFile.getPath() + "..."); + logger.info("Writing quads to {}...", targetFile.getPath()); if (!targetFile.isAbsolute()) { targetFile = new File(System.getProperty("user.dir") + "/" + outputFile); @@ -539,30 +648,39 @@ private static File writeOutputUncompressed(QuadStore store, String outputFile, doneMessage = "Writing to " + targetFile.getPath() + " is done."; - out = Files.newBufferedWriter(targetFile.toPath(), StandardCharsets.UTF_8); + out = Files.newOutputStream(targetFile.toPath()); } else { - out = new BufferedWriter(new OutputStreamWriter(System.out, StandardCharsets.UTF_8)); + isSystemOut = true; + out = System.out; } store.write(out, format); - out.close(); + if (isSystemOut) { + out.flush(); // flush System.out stream + out = null; // replace with null, so it won't be closed later; + } if (doneMessage != null) { logger.info(doneMessage); } } catch (Exception e) { - System.err.println("Writing output failed. Reason: " + e.getMessage()); + logger.error("Writing output failed. Reason: " + e.getMessage()); + } finally { + if (out != null) { + try { + out.close(); + } catch (IOException e) { + logger.error("Could not close writer. ", e); + } + } + } return targetFile; } private static QuadStore getStoreForFormat(String outputFormat) { - if (outputFormat == null || outputFormat.equals("nquads") || outputFormat.equals("hdt")) { - return new SimpleQuadStore(); - } else { - return new RDF4JStore(); - } + return new RDF4JStore(); } } diff --git a/src/main/java/be/ugent/rml/conformer/Converter.java b/src/main/java/be/ugent/rml/conformer/Converter.java index f31f49bc..a886a197 100644 --- a/src/main/java/be/ugent/rml/conformer/Converter.java +++ b/src/main/java/be/ugent/rml/conformer/Converter.java @@ -1,7 +1,5 @@ package be.ugent.rml.conformer; -import be.ugent.rml.term.Term; - import java.util.Map; /** @@ -10,16 +8,8 @@ */ interface Converter { /** - * Detection logic needed to determine mapping format of TriplesMap. - * @param tm TriplesMap - * @return true if is specific mapping format false if not - */ - boolean detect(Term tm); - - /** - * Try to convert mapping language TriplesMap to RML. + * Try to convert R2RML and old RML to new RML. * Has to be atomic. Original model must be recovered if conversion fails. - * @param tm TriplesMap */ - void convert(Term tm, Map mappingOptions) throws Exception; + void convert(Map mappingOptions) throws Exception; } diff --git a/src/main/java/be/ugent/rml/conformer/MappingConformer.java b/src/main/java/be/ugent/rml/conformer/MappingConformer.java index 63ba77f1..aaa8a442 100644 --- a/src/main/java/be/ugent/rml/conformer/MappingConformer.java +++ b/src/main/java/be/ugent/rml/conformer/MappingConformer.java @@ -5,8 +5,7 @@ import be.ugent.rml.term.NamedNode; import be.ugent.rml.term.Term; -import java.io.*; -import java.util.ArrayList; +import java.io.FileNotFoundException; import java.util.List; import java.util.Map; @@ -14,21 +13,27 @@ /** * Only validates by checking for at least one TriplesMap. - * Converts mapping files to RML. Currently only R2RML converter is implemented. + * Converts mapping files to W3C's Knowledge Graph Community Group RML from RML and R2RML. * InputStream of mapping file is used to create a store. TriplesMaps in store that need conversion * are detected by applying the converters detection methods and saved. convert tries to convert these - * to RML. Exceptions can be raised during validation and conversion, which the caller has to handle. - * Output of detect() informs if convert() should be used to convert to valid RML. + * to W3C RML. Exceptions can be raised during validation and conversion, which the caller has to handle. + * Output of detect() informs if convert() should be used to convert to valid W3C RML. * The validated RML can be returned as a QuadStore with getStore(). */ public class MappingConformer { + public enum Dialect { + RML, // Old RML + R2RML, // W3C's R2RML + RML2 // W3C's Knowledge Graph Construction Community Group RML + } + private QuadStore store; - private List unconvertedTriplesMaps = new ArrayList<>(); private Map mappingOptions; /** * Create MappingConformer from InputStream of mapping file in RDF. + * * @param store A QuadStore with the mapping rules. * @throws FileNotFoundException */ @@ -38,6 +43,7 @@ public MappingConformer(QuadStore store) throws Exception { /** * Create MappingConformer from InputStream of mapping file in RDF. + * * @param store A QuadStore with the mapping rules. * @throws FileNotFoundException */ @@ -48,65 +54,45 @@ public MappingConformer(QuadStore store, Map mappingOptions) thr /** * This method makes the QuadStore conformant to the RML spec. + * * @return True if the store had to be updated, else false. * @throws Exception if something goes wrong during detection or conversion. */ public boolean conform() throws Exception { - boolean conversionNeeded = this.detect(); - - if (conversionNeeded) { - this.convert(); - } - - return conversionNeeded; + this.detect(); + return false; } /** - * Detect if mapping file is valid RML. - * @return true if valid RML, false if conversion is needed + * Detect if mapping file is valid W3C RML. + * + * @return Dialect of the mapping file. Null if invalid. * @throws Exception if invalid or unconvertable */ - private boolean detect() throws Exception { - // TODO generalise for multiple converters - Converter converter = new R2RMLConverter(store); - - List triplesMaps = Utils.getSubjectsFromQuads(store - .getQuads( - null, - new NamedNode(RDF + "type"), - new NamedNode(RR + "TriplesMap"))); + private void detect() throws Exception { + // convert rml + RMLConverterNew converter = new RMLConverterNew(store); + converter.convert(mappingOptions); + // Check if we have a valid TriplesMap. + List triplesMaps = Utils.getSubjectsFromQuads(store.getQuads(null, new NamedNode(RML2 + "logicalSource"), null)); if (triplesMaps.isEmpty()) { throw new Exception("Mapping requires at least one TriplesMap"); } - // Find all triples maps - // This could be more efficient with a while loop, - // but these TriplesMaps are needed in any case when calling convert(). - for (Term triplesMap : triplesMaps) { - if (converter.detect(triplesMap)) { - unconvertedTriplesMaps.add(triplesMap); + // Triples Maps need a subject Map + List triplesMaps2 = Utils.getSubjectsFromQuads(store.getQuads(null, null, new NamedNode(RML2 + "TriplesMap"))); + for (Term triplesMap : triplesMaps2) { + if (!store.contains(triplesMap, new NamedNode(RML2 + "subjectMap"), null)) { + throw new Exception("TriplesMap requires a subject map"); } } - - return ! unconvertedTriplesMaps.isEmpty(); } - /** - * Tries to convert to RML. Model should still be valid on failure - * @throws Exception conversion failed - */ - private void convert() throws Exception { - // TODO generalise for multiple converters - Converter converter = new R2RMLConverter(store); - - for (Term unconvertedTriplesMap : unconvertedTriplesMaps) { - converter.convert(unconvertedTriplesMap, mappingOptions); - } - } /** * Debugging helper function to check difference of models + * * @param store QuadStore which subtracts * @return boolean this.store isSubset of given store */ @@ -116,6 +102,7 @@ boolean differenceInConformer(QuadStore store) { /** * Debugging helper function to check difference of models + * * @param store QuadStore which subtracts * @return boolean given store isSubset of store */ @@ -125,6 +112,7 @@ boolean differenceInGivenStore(QuadStore store) { /** * Get a valid QuadStore + * * @return a valid QuadStore */ public QuadStore getStore() { diff --git a/src/main/java/be/ugent/rml/conformer/R2RMLConverter.java b/src/main/java/be/ugent/rml/conformer/R2RMLConverter.java index 5449c06e..3b6b0ab3 100644 --- a/src/main/java/be/ugent/rml/conformer/R2RMLConverter.java +++ b/src/main/java/be/ugent/rml/conformer/R2RMLConverter.java @@ -1,7 +1,8 @@ package be.ugent.rml.conformer; +import be.ugent.idlab.knows.dataio.access.DatabaseType; import be.ugent.rml.NAMESPACES; -import be.ugent.rml.access.DatabaseType; +import be.ugent.rml.Utils; import be.ugent.rml.store.QuadStore; import be.ugent.rml.term.Literal; @@ -26,16 +27,6 @@ public class R2RMLConverter implements Converter { this.store = store; } - /** - * TriplesMap is R2RML if RR:logicalTable property is found - * - * @param triplesMap - * @return true if triplesMap is R2RML (tripleMap contains a rr:logicalTable) - */ - public boolean detect(Term triplesMap) { - return store.contains(triplesMap, new NamedNode(RR + "logicalTable"), null); - } - /** * Tries to convert R2RML TriplesMap to rml by: * - renaming logicalTable to logicalSource @@ -48,66 +39,62 @@ public boolean detect(Term triplesMap) { * * @param triplesMap rr:TriplesMap */ - public void convert(Term triplesMap, Map mappingOptions) throws Exception { - // UNSAFE store changes not yet allowed; check if all required properties are present - Term logicalTable; - - // Get logical table - try { - logicalTable = store - .getQuad(triplesMap, new NamedNode(RR + "logicalTable"), null) - .getObject(); - } catch (Exception e) { - // Also not R2RML - throw new UnsupportedOperationException("Mapping is either RML without logicalSource or R2RML without logicalTable"); - } - - Term database; + public void convert(Map mappingOptions) throws Exception { + for (Term triplesMap: Utils.getSubjectsFromQuads(store.getQuads(null, new NamedNode(RML2 + "subjectMap"), null))) { + // UNSAFE store changes not yet allowed; check if all required properties are present + Term logicalTable; + + // Get logical table + try { + logicalTable = store + .getQuad(triplesMap, new NamedNode(RR + "logicalTable"), null) + .getObject(); + } catch (Exception e) { + // Also not R2RML + throw new UnsupportedOperationException("Mapping is either RML without logicalSource or R2RML without logicalTable"); + } - // SAFE store changes allowed + Term database; -// if (! store.contains(null, null, new NamedNode(D2RQ + "Database"))) { - database = new NamedNode(triplesMap.getValue() + "_database"); + // SAFE store changes allowed + database = new NamedNode(triplesMap.getValue() + "_database"); - if (mappingOptions != null) { - store.addQuad(database, new NamedNode(RDF + "type"), new NamedNode(D2RQ + "Database")); + if (mappingOptions != null) { + store.addQuad(database, new NamedNode(RDF + "type"), new NamedNode(D2RQ + "Database")); - for (Map.Entry entry : mappingOptions.entrySet()) { - String removePrefix = entry.getKey(); - store.addQuad(database, new NamedNode(D2RQ + removePrefix), new Literal(entry.getValue())); + for (Map.Entry entry : mappingOptions.entrySet()) { + String removePrefix = entry.getKey(); + store.addQuad(database, new NamedNode(D2RQ + removePrefix), new Literal(entry.getValue())); - if (removePrefix.equals("jdbcDSN")) { - DatabaseType type = DatabaseType.getDBtype(entry.getValue()); - String driver = type.getDriver(); + if (removePrefix.equals("jdbcDSN")) { + DatabaseType type = DatabaseType.getDBtype(entry.getValue()); + String driver = type.getDriver(); - store.addQuad(database, new NamedNode(D2RQ + "jdbcDriver"), new Literal(driver)); + store.addQuad(database, new NamedNode(D2RQ + "jdbcDriver"), new Literal(driver)); + } } } + + // Add logical source + String logicalSourceIRI = triplesMap.getValue() + "_logicalSource"; + Term logicalSource = new NamedNode(logicalSourceIRI); + + store.addQuad(triplesMap, new NamedNode(RML + "logicalSource"), logicalSource, null); + store.addQuad(logicalSource, new NamedNode(RML + "referenceFormulation"), + new NamedNode(NAMESPACES.QL + "CSV") + ); + + // Also add old R2RML for AccessFactory property + store.addQuad(logicalSource, new NamedNode(RML + "source"), + database + ); + store.tryPropertyTranslation(logicalTable, new NamedNode(RR + "sqlQuery"), logicalSource, new NamedNode(RML + "query")); + store.tryPropertyTranslation(logicalTable, new NamedNode(RR + "tableName"), logicalSource, new NamedNode(RR + "tableName")); + store.tryPropertyTranslation(logicalTable, new NamedNode(RR + "sqlVersion"), logicalSource, new NamedNode(RR + "sqlVersion")); + + // Rename on whole store instead of deep search in TriplesMap Resource + store.renameAllPredicates(new NamedNode(RR + "column"), new NamedNode(RML + "reference")); + store.removeQuads(triplesMap, new NamedNode(RR + "logicalTable"), null); } -// } -// else { -// database = store.getQuad(null, null, new NamedNode(D2RQ + "Database")).getSubject(); -// } - - // Add logical source - String logicalSourceIRI = triplesMap.getValue() + "_logicalSource"; - Term logicalSource = new NamedNode(logicalSourceIRI); - - store.addQuad(triplesMap, new NamedNode(RML + "logicalSource"), logicalSource, null); - store.addQuad(logicalSource, new NamedNode(RML + "referenceFormulation"), - new NamedNode(NAMESPACES.QL + "CSV") - ); - - // Also add old R2RML for AccessFactory property - store.addQuad(logicalSource, new NamedNode(RML + "source"), - database - ); - store.tryPropertyTranslation(logicalTable, new NamedNode(RR + "sqlQuery"), logicalSource, new NamedNode(RML + "query")); - store.tryPropertyTranslation(logicalTable, new NamedNode(RR + "tableName"), logicalSource, new NamedNode(RR + "tableName")); - store.tryPropertyTranslation(logicalTable, new NamedNode(RR + "sqlVersion"), logicalSource, new NamedNode(RR + "sqlVersion")); - - // Rename on whole store instead of deep search in TriplesMap Resource - store.renameAll(new NamedNode(RR + "column"), new NamedNode(RML + "reference")); - store.removeQuads(triplesMap, new NamedNode(RR + "logicalTable"), null); } } diff --git a/src/main/java/be/ugent/rml/conformer/RMLConverterNew.java b/src/main/java/be/ugent/rml/conformer/RMLConverterNew.java new file mode 100644 index 00000000..1fadaec4 --- /dev/null +++ b/src/main/java/be/ugent/rml/conformer/RMLConverterNew.java @@ -0,0 +1,344 @@ +package be.ugent.rml.conformer; + +import be.ugent.idlab.knows.dataio.access.DatabaseType; +import be.ugent.rml.records.ReferenceFormulation; +import be.ugent.rml.store.Quad; +import be.ugent.rml.store.QuadStore; +import be.ugent.rml.term.BlankNode; +import be.ugent.rml.term.Literal; +import be.ugent.rml.term.NamedNode; +import be.ugent.rml.term.Term; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; + +import static be.ugent.rml.NAMESPACES.*; + +public class RMLConverterNew implements Converter { + + private final static Logger logger = LoggerFactory.getLogger(RMLConverterNew.class); + + private final Map replacementsObjects = new HashMap<>() {{ + /* Old RML */ + put(RML + "BaseSource", new ReplacementEntry(RML2 + "LogicalSource", null)); + /* R2RML */ + put(RR + "BaseTableOrView", new ReplacementEntry(RML2 + "LogicalSource", null)); + put(RR + "Literal", new ReplacementEntry(RML2 + "Literal", null)); + put(RR + "R2RMLView", new ReplacementEntry(RML2 + "LogicalSource", null)); + put(RR + "SQL2008", new ReplacementEntry(RML2 + "SQL2008", null)); + + }}; + + private final Map replacementsPredicates = new HashMap<>() {{ + put(RML + "source", new ReplacementEntry(RML2 + "source", (quad, quadStore) -> processSources(quad, quadStore))); + put(RML + "query", new ReplacementEntry(RML2 + "query", (quad, quadStore) -> processQueries(quad, quadStore))); + put(RR + "column", new ReplacementEntry(RML2 + "reference", null)); + put(RR + "SQL2008", new ReplacementEntry(RML2 + "SQL2008", null)); + put(RR + "logicalTable", new ReplacementEntry(RML2 + "logicalSource", (quad, quadStore) -> processLogicalTable(quad, quadStore))); + put(RR + "sqlVersion", new ReplacementEntry(RML2 + "referenceFormulation", null)); + put(RR + "tableName", new ReplacementEntry(RML2 + "source", RMLConverterNew::processTableName)); + put(RR + "Literal", new ReplacementEntry(RML2 + "Literal", RMLConverterNew::processTableName)); + }}; + + private final Map objectRenames = new HashMap<>() {{ + put(QL + "CSV", ReferenceFormulation.CSV); + put(QL + "JSONPath", ReferenceFormulation.JSONPath); + put(QL + "XPath", ReferenceFormulation.XPath); + put(RML + "LogicalSource", RML2 + "LogicalSource"); + put(RR + "Literal", RML2 + "Literal"); + put(RML + "TriplesMap", RML2 + "TriplesMap"); + put(RR + "BlankNode", RML2 + "BlankNode"); + put(RR + "IRI", RML2 + "IRI"); + put(RR + "Join", RML2 + "Join"); + put(RR + "PredicateMap", RML2 + "PredicateMap"); + put(RR + "PredicateObjectMap", RML2 + "PredicateObjectMap"); + put(RR + "RefObjectMap", RML2 + "RefObjectMap"); + put(RR + "SubjectMap", RML2 + "SubjectMap"); + put(RR + "ObjectMap", RML2 + "ObjectMap"); + put(RR + "TermMap", RML2 + "TermMap"); + put(RR + "TriplesMap", RML2 + "TriplesMap"); + put(RR + "GraphMap", RML2 + "GraphMap"); + put(RR + "defaultGraph", RML2 + "defaultGraph"); + put(RML + "LanguageMap", RML2 + "LanguageMap"); + put(RMLT + "LogicalTarget", RML2 + "LogicalTarget"); + put(RMLT + "Target", RML2 + "Target"); + }}; + + /* + TODO: figure out not included entries: + - rr:inverseExpression: what to do with the deprecated tag? + */ + private final Map predicateRenames = new HashMap<>() {{ + /* Old RML */ + put(RML + "iterator", RML2 + "iterator"); + put(RML + "logicalSource", RML2 + "logicalSource"); + put(RML + "logicalTarget", RML2 + "logicalTarget"); + put(RML + "reference", RML2 + "reference"); + put(RML + "referenceFormulation", RML2 + "referenceFormulation"); + put(RML + "languageMap", RML2 + "languageMap"); + put(RML + "parentTermMap", RML2 + "parentTermMap"); + + /* Old RMLT */ + put(RMLT + "target", RML2 + "target"); + put(RMLT + "serialization", RML2 + "serialization"); + put(RMLT + "compression", RML2 + "compression"); + + /* R2RML */ + put(RR + "joinCondition", RML2 + "joinCondition"); + put(RR + "parent", RML2 + "parent"); + put(RR + "child", RML2 + "child"); + put(RR + "parentTriplesMap", RML2 + "parentTriplesMap"); + + put(RR + "column", RML2 + "reference"); + put(RR + "class", RML2 + "class"); + put(RR + "constant", RML2 + "constant"); + put(RR + "datatype", RML2 + "datatype"); + put(RR + "graph", RML2 + "graph"); + put(RR + "graphMap", RML2 + "graphMap"); + put(RR + "language", RML2 + "language"); + put(RR + "object", RML2 + "object"); + put(RR + "objectMap", RML2 + "objectMap"); + put(RR + "predicate", RML2 + "predicate"); + put(RR + "predicateMap", RML2 + "predicateMap"); + put(RR + "predicateObjectMap", RML2 + "predicateObjectMap"); + put(RR + "subject", RML2 + "subject"); + put(RR + "subjectMap", RML2 + "subjectMap"); + put(RR + "termType", RML2 + "termType"); + put(RR + "template", RML2 + "template"); + put(RR + "logicalTable", RML2 + "logicalSource"); + }}; + private final Set obsoletes = new HashSet<>() {{ + add(RR + "sqlVersion"); + }}; + private final QuadStore store; + + public RMLConverterNew(QuadStore store) { + this.store = store; + } + + private static void processTableName(Quad tableName, QuadStore store) { + store.addQuad(tableName.getSubject(), new NamedNode(RML2 + "referenceFormulation"), new NamedNode(RML2 + "SQL2008Table")); + store.removeQuads(tableName.getSubject(), new NamedNode(RML2 + "referenceFormulation"), null); + } + + /** + * Replace a logical table quad with a proper logical source + * @param logicalTableQuad + * @param quadStore + */ + private void processLogicalTable(Quad logicalTableQuad, QuadStore quadStore) { + BlankNode blank = new BlankNode(); + quadStore.addQuad(logicalTableQuad.getSubject(), new NamedNode(RML2 + "logicalSource"), blank); + + // add the reference formulation of the logical source + quadStore.removeQuads(logicalTableQuad.getSubject(), new NamedNode(RML2 + "referenceFormulation"), new NamedNode(ReferenceFormulation.RDBTable)); + + // now have blank contain all required fields + // translate the table name as rml:source + Term logicalTable = logicalTableQuad.getObject(); + Term tableName = quadStore.getQuads(logicalTable, new NamedNode(RR + "tableName"), null) + .get(0) + .getObject(); + + quadStore.addQuad(blank, new NamedNode(RML2 + "source"), tableName); + quadStore.addQuad(blank, new NamedNode(RML2 + "referenceFormulation"), new NamedNode(ReferenceFormulation.RDBTable)); + quadStore.removeQuads(logicalTableQuad); + } + + @Override + public void convert(Map mappingOptions) throws Exception { + // inject DB sources for every triple map that has a logicalTable as source + if (mappingOptions != null) { + // R2RML conversion + // convert all logical tables into proper logical sources + List logicalTableMaps = this.store.getQuads(null, new NamedNode(RR + "logicalTable"), null); + for (Quad map : logicalTableMaps) { + // insert the database for the logical source + Term database = new NamedNode(map.getSubject().getValue() + "_database"); + this.store.addQuad(database, new NamedNode(RDF + "type"), new NamedNode(D2RQ + "Database")); + for (Map.Entry entry : mappingOptions.entrySet()) { + this.store.addQuad(database, new NamedNode(D2RQ + entry.getKey()), new Literal(entry.getValue())); + + if (entry.getKey().equals("jdbcDSN")) { + DatabaseType type = DatabaseType.getDBtype(entry.getValue()); + this.store.addQuad(database, new NamedNode(D2RQ + "jdbcDriver"), new Literal(type.getDriver())); + } + } + + Term logicalSource = new NamedNode(map.getSubject().getValue() + "_logicalSource"); + this.store.addQuad(logicalSource, new NamedNode(RML2+"source"), database); + + // translate rr:logicalTable to rml:source + // grab the logical table + Term logicalTable = map.getObject(); + List tableNames = this.store.getQuads(logicalTable, new NamedNode(RR + "tableName"), null); + if (tableNames.isEmpty()) { + // no tableNames present, SQL query must be present + List queries = this.store.getQuads(logicalTable, new NamedNode(RR + "sqlQuery"), null); + if (queries.isEmpty()) { + throw new IllegalArgumentException("Logical table contains neither a tableName, nor a SQL query"); + } + this.store.addQuad(logicalSource, new NamedNode(RML2 + "referenceFormulation"), new NamedNode(ReferenceFormulation.RDBQuery)); + this.store.addQuad(logicalSource, new NamedNode(RML2 + "iterator"), new Literal(queries.get(0).getObject().getValue())); + + this.store.removeQuads(logicalTable, new NamedNode(RR + "sqlQuery"), null); + } else { + this.store.addQuad(logicalSource, new NamedNode(RML2 + "referenceFormulation"), new NamedNode(ReferenceFormulation.RDBTable)); + this.store.addQuad(logicalSource, new NamedNode(RML2 + "iterator"), new Literal(tableNames.get(0).getObject().getValue())); + + this.store.removeQuads(logicalTable, new NamedNode(RR + "tableName"), null); + } + + + // connect the logical source to the map + this.store.addQuad(map.getSubject(), new NamedNode(RML2 + "logicalSource"), logicalSource); + + // clean up the store: remove logicalTable and tableName + this.store.removeQuads(map.getSubject(), new NamedNode(RR + "logicalTable"), null); + + } + + // convert all logical sources that have a rr:tableName into proper sources + List tableNameLogicalSources = this.store.getQuads(null, new NamedNode(RR + "tableName"), null); + for (Quad ls : tableNameLogicalSources) { + // drop the sql version + // set the correct reference formulation + this.store.addQuad(ls.getSubject(), new NamedNode(RML2 + "referenceFormulation"), new NamedNode(ReferenceFormulation.RDBTable)); + // put table name in rml:iterator + String tableName = ls.getObject().getValue(); + this.store.addQuad(ls.getSubject(), new NamedNode(RML2 + "iterator"), new Literal(tableName)); + // drop obsolete fields + this.store.removeQuads(ls.getSubject(), new NamedNode(RR + "sqlVersion"), null); + this.store.removeQuads(ls); + } + + // convert all logical sources that have a rml:query to proper sources + List queryLogicalSources = this.store.getQuads(null, new NamedNode(RML + "query"), null); + for (Quad ls : queryLogicalSources) { + Term source = null; + if (this.store.contains(ls.getSubject(), new NamedNode(RML + "source"), null)) { + source = this.store.getQuad(ls.getSubject(), new NamedNode(RML + "source"), null).getObject(); + } + // drop any obsolete reference formulations + this.store.removeQuads(ls.getSubject(), new NamedNode(RML + "referenceFormulation"), null); + // set the proper reference formulation + if (source != null && this.store.contains(source, new NamedNode(SD + "resultFormat"), null)) { + Term resultsFormat = this.store.getQuad(source, new NamedNode(SD + "resultFormat"), null).getObject(); + this.store.addQuad(ls.getSubject(), new NamedNode(RML2 + "referenceFormulation"), resultsFormat); + } else { + this.store.addQuad(ls.getSubject(), new NamedNode(RML2 + "referenceFormulation"), new NamedNode(ReferenceFormulation.RDBQuery)); + } + // set the query into the iterator + this.store.addQuad(ls.getSubject(), new NamedNode(RML2 + "iterator"), ls.getObject()); + + // drop the obsolete quads + this.store.removeQuads(ls.getSubject(), new NamedNode(RR + "sqlVersion"), null); + this.store.removeQuads(ls); + } + } + + + for (Map.Entry e : this.replacementsPredicates.entrySet()) { + List quads = this.store.getQuads(null, new NamedNode(e.getKey()), null); + ReplacementEntry entry = e.getValue(); + for (Quad q : quads) { + if (entry.function != null) { + entry.function.call(q, this.store); + } else { + // apply the replace and warn + this.store.renameAllPredicates(new NamedNode(e.getKey()), new NamedNode(entry.replacementTerm)); + logger.warn("Predicate replacement function for term {} is not yet defined!", e.getKey()); + } + } + } + + for (Map.Entry e : this.replacementsObjects.entrySet()) { + // find all quads that carry this object + List quads = this.store.getQuads(null, null, new NamedNode(e.getKey())); + ReplacementEntry entry = e.getValue(); + for (Quad q : quads) { + this.store.addQuad(q.getSubject(), q.getPredicate(), new NamedNode(entry.replacementTerm)); + if (entry.function != null) { + entry.function.call(q, store); + } else { + logger.warn("Object replacement function for term {} is not yet defined!", e.getKey()); + } + } + this.store.removeQuads(quads); + } + + // apply simple renames first + for (Map.Entry e : this.predicateRenames.entrySet()) { + String old = e.getKey(); + String _new = e.getValue(); + + this.store.renameAllPredicates(new NamedNode(old), new NamedNode(_new)); + } + + for (Map.Entry e : this.objectRenames.entrySet()) { + String old = e.getKey(); + String _new = e.getValue(); + + this.store.renameAllObjects(new NamedNode(old), new NamedNode(_new)); + } + + /* Replace namespaces */ + this.store.removeNameSpace("rml"); + this.store.removeNameSpace("rr"); + this.store.removeNameSpace("ql"); + this.store.addNameSpace("rml", "http://w3id.org/rml/"); + + dropObsolete(); + } + + private void processSources(Quad source, QuadStore store) { + if (source.getObject().isLiteral()) { + String path = source.getObject().getValue(); + BlankNode node = new BlankNode(); + store.addQuad(node, new NamedNode(RDF + "type"), new NamedNode(DCAT + "Distribution")); + store.addQuad(node, new NamedNode(RDF + "type"), new NamedNode(RML2 + "Source")); + store.addQuad(node, new NamedNode(DCAT + "downloadURL"), new Literal(path)); // TODO: file:// prefix + store.addQuad(source.getSubject(), new NamedNode(RML2 + "source"), node); + store.removeQuads(source.getSubject(), source.getPredicate(), source.getObject()); + } + store.renameAllPredicates(new NamedNode(RML + "source"), new NamedNode(RML2 + "source")); + } + + private void processQueries(Quad query, QuadStore store) throws Exception { + Term source = store.getQuad(query.getSubject(), new NamedNode(RML2 + "source"), null).getObject(); + if (store.contains(source, new NamedNode(SD + "resultFormat"), null)) { + Term supportedLanguage = store.getQuad(source, new NamedNode(SD + "resultFormat"), null).getObject(); + store.addQuad(query.getSubject(), new NamedNode(RML2 + "referenceFormulation"), supportedLanguage); + } + else { + store.addQuad(query.getSubject(), new NamedNode(RML2 + "referenceFormulation"), new NamedNode(RML2 + "SQL2008Query")); + } + + store.removeQuads(query.getSubject(), new NamedNode(RML + "referenceFormulation"), null); + store.removeQuads(query.getSubject(), new NamedNode(RML + "iterator"), null); + store.renameAllPredicates(new NamedNode(RML + "query"), new NamedNode(RML2 + "iterator")); + } + + private void dropObsolete() { + for (String obsolete : obsoletes) { + this.store.removeQuads(null, new NamedNode(obsolete), null); + } + } + + /** + * Serves as a function to run when converting terms that are replaced by another term + */ + @FunctionalInterface + private interface ReplaceFunction { + void call(Quad quad, QuadStore store) throws Exception; + } + + /** + * Private record to contain the term to replace and a function to further execute on the QuadStore + * @param replacementTerm term to be put instead of the previous one + * @param function function to run at replacement time + */ + private record ReplacementEntry(String replacementTerm, ReplaceFunction function) { + } +} diff --git a/src/main/java/be/ugent/rml/extractor/ConstantExtractor.java b/src/main/java/be/ugent/rml/extractor/ConstantExtractor.java index 3e9ab608..deec2645 100644 --- a/src/main/java/be/ugent/rml/extractor/ConstantExtractor.java +++ b/src/main/java/be/ugent/rml/extractor/ConstantExtractor.java @@ -1,31 +1,46 @@ package be.ugent.rml.extractor; +import be.ugent.idlab.knows.dataio.record.Record; import be.ugent.rml.functions.SingleRecordFunctionExecutor; -import be.ugent.rml.records.Record; import java.io.IOException; -import java.util.ArrayList; import java.util.List; public class ConstantExtractor implements Extractor, SingleRecordFunctionExecutor { - private String constant; + private final String constant; + private final List constantList; + + /** + * Becomes true when a function is detected that needs a special marker to indicate "End-of-File" (EOF). + */ + private final boolean needsEOFMarker; public ConstantExtractor(String constant) { + this. constantList = List.of(constant); this.constant = constant; + needsEOFMarker = constant.equals("https://w3id.org/imec/idlab/function#implicitDelete") + || constant.equals("http://example.com/idlab/function/implicitDelete"); } @Override public List extract(Record record) { - ArrayList result = new ArrayList<>(); - result.add(constant); - - return result; + return this.constantList; } @Override public Object execute(Record record) throws IOException { - return extract(record); + return this.constant; + } + + /** + * Returns {@code true} if this extractor needs an End-of-File (EOF) marker the end of the dataset. + * At this moment only required if function https://w3id.org/imec/idlab/function#implicitDelete is used. + * @return {@code true} if an EOF marker is required. + */ + @Override + public boolean needsEOFMarker() { + return needsEOFMarker; } /** diff --git a/src/main/java/be/ugent/rml/extractor/Extractor.java b/src/main/java/be/ugent/rml/extractor/Extractor.java index 883fe098..5ea0f4d1 100644 --- a/src/main/java/be/ugent/rml/extractor/Extractor.java +++ b/src/main/java/be/ugent/rml/extractor/Extractor.java @@ -1,6 +1,7 @@ package be.ugent.rml.extractor; -import be.ugent.rml.records.Record; + +import be.ugent.idlab.knows.dataio.record.Record; import java.util.List; diff --git a/src/main/java/be/ugent/rml/extractor/HashExtractor.java b/src/main/java/be/ugent/rml/extractor/HashExtractor.java new file mode 100644 index 00000000..8ad6e808 --- /dev/null +++ b/src/main/java/be/ugent/rml/extractor/HashExtractor.java @@ -0,0 +1,34 @@ +package be.ugent.rml.extractor; + +import be.ugent.idlab.knows.dataio.record.Record; +import be.ugent.rml.functions.SingleRecordFunctionExecutor; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; + +public class HashExtractor implements Extractor, SingleRecordFunctionExecutor { + + public HashExtractor() { + } + + @Override + public List extract(Record record) { + return Collections.singletonList(Integer.toString(record.hashCode())); + } + + @Override + public Object execute(Record record) throws IOException { + return extract(record); + } + + /** + * to String method + * + * @return string + */ + @Override + public String toString() { + return "HashExtractor"; + } +} diff --git a/src/main/java/be/ugent/rml/extractor/ReferenceExtractor.java b/src/main/java/be/ugent/rml/extractor/ReferenceExtractor.java index 149e6f57..a2baa185 100644 --- a/src/main/java/be/ugent/rml/extractor/ReferenceExtractor.java +++ b/src/main/java/be/ugent/rml/extractor/ReferenceExtractor.java @@ -1,23 +1,24 @@ package be.ugent.rml.extractor; +import be.ugent.idlab.knows.dataio.record.Record; +import be.ugent.idlab.knows.dataio.record.RecordValue; import be.ugent.rml.functions.SingleRecordFunctionExecutor; -import be.ugent.rml.records.Record; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; import java.util.List; public class ReferenceExtractor implements Extractor, SingleRecordFunctionExecutor { public String reference; - private boolean ignoreDoubleQuotes; + private final boolean ignoreDoubleQuotes; + private final boolean strictReferenceResolution; - public ReferenceExtractor(String reference, boolean ignoreDoubleQuotes) { + public ReferenceExtractor(String reference, boolean ignoreDoubleQuotes, boolean strictReferenceResolution) { this.reference = reference; this.ignoreDoubleQuotes = ignoreDoubleQuotes; - } - - public ReferenceExtractor(String reference) { - this(reference, false); + this.strictReferenceResolution = strictReferenceResolution; } @Override @@ -28,7 +29,21 @@ public List extract(Record record) { temp = temp.substring(1, temp.length() - 1); } - return record.get(temp); + RecordValue recordValue = record.get(temp); + + if (recordValue.isOk()) { // This means no error occurred during reference resolving and the value is not a null value + Object value = recordValue.getValue(); + if (value instanceof Iterable) { + return new ArrayList<>((Collection) value); + } else { + return List.of(value); + } + } else if (recordValue.isEmpty() || // The record has a null value + recordValue.isNotFound() && !strictReferenceResolution) { // The reference has not been found (e.g. nu field with that name) + return List.of(); + } else { + throw new IllegalArgumentException(recordValue.getMessage()); + } } @Override @@ -40,4 +55,8 @@ public String toString() { public Object execute(Record record) throws IOException { return extract(record); } + + public String getReference(){ + return this.reference; + } } diff --git a/src/main/java/be/ugent/rml/functions/AbstractSingleRecordFunctionExecutor.java b/src/main/java/be/ugent/rml/functions/AbstractSingleRecordFunctionExecutor.java index 90b9944f..5073d892 100644 --- a/src/main/java/be/ugent/rml/functions/AbstractSingleRecordFunctionExecutor.java +++ b/src/main/java/be/ugent/rml/functions/AbstractSingleRecordFunctionExecutor.java @@ -1,17 +1,23 @@ package be.ugent.rml.functions; -import be.ugent.rml.records.Record; +import be.ugent.idlab.knows.dataio.record.Record; import java.util.HashMap; +import java.util.Map; public abstract class AbstractSingleRecordFunctionExecutor implements SingleRecordFunctionExecutor { protected MultipleRecordsFunctionExecutor functionExecutor; public Object execute(Record record) throws Exception { - HashMap recordsMap = new HashMap<>(); + Map recordsMap = new HashMap<>(); recordsMap.put("_default", record); return this.functionExecutor.execute(recordsMap); } + + @Override + public boolean needsEOFMarker() { + return functionExecutor.needsEOFMarker(); + } } diff --git a/src/main/java/be/ugent/rml/functions/ConcatFunction.java b/src/main/java/be/ugent/rml/functions/ConcatFunction.java index ed63aeb4..d6c0cf60 100644 --- a/src/main/java/be/ugent/rml/functions/ConcatFunction.java +++ b/src/main/java/be/ugent/rml/functions/ConcatFunction.java @@ -1,16 +1,17 @@ package be.ugent.rml.functions; +import be.ugent.idlab.knows.dataio.record.Record; import be.ugent.rml.Utils; import be.ugent.rml.extractor.ConstantExtractor; import be.ugent.rml.extractor.Extractor; import be.ugent.rml.extractor.ReferenceExtractor; -import be.ugent.rml.records.Record; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.List; + public class ConcatFunction implements SingleRecordFunctionExecutor { private static final Logger logger = LoggerFactory.getLogger(ConcatFunction.class); @@ -22,55 +23,41 @@ public ConcatFunction(List extractors, boolean encodeURI) { this.encodeURI = encodeURI; } - public ConcatFunction(List extractors) { - this(extractors, false); - } - @Override public List execute(Record record) { return concat(record); } private List concat(Record record) { - ArrayList results = new ArrayList<>(); + List results = new ArrayList<>(); results.add(""); //we only return a result when all elements of the template are found boolean allValuesFound = true; int referenceCount = 0; - String onlyConstants = ""; + StringBuilder onlyConstants = new StringBuilder(); //we iterate over all elements of the template, unless one is not found for (int i = 0; allValuesFound && i < extractors.size(); i++) { Extractor extractor = extractors.get(i); - - List extractedValues = new ArrayList<>(); - FunctionUtils.functionObjectToList(extractor.extract(record), extractedValues); + final boolean isReferenceExtractor = extractor instanceof ReferenceExtractor; + final boolean isConstantExtractor = extractor instanceof ConstantExtractor; + List extractedValues = FunctionUtils.functionObjectToList(extractor.extract(record)); if (!extractedValues.isEmpty()) { - ArrayList temp = new ArrayList<>(); - - for (int k = 0; k < results.size(); k ++) { - - for (int j = 0; j < extractedValues.size(); j ++) { - String result = results.get(k); - String value = extractedValues.get(j); - - if (encodeURI && extractor instanceof ReferenceExtractor) { - value = Utils.encodeURI(value); + List temp = new ArrayList<>(); + + for (String result : results) { + for (String value : extractedValues) { + if (isReferenceExtractor) { + if (encodeURI) + value = Utils.encodeURI(value); + referenceCount ++; + } else if (isConstantExtractor) { + onlyConstants.append(value); } - result += value; - - if (extractor instanceof ConstantExtractor) { - onlyConstants += value; - } - - temp.add(result); - } - - if (extractor instanceof ReferenceExtractor) { - referenceCount ++; + temp.add(result + value); } } @@ -78,14 +65,13 @@ private List concat(Record record) { } if (extractedValues.isEmpty()) { - logger.warn("Not all values for a template where found. More specific, the variable " + extractor + " did not provide any results."); + logger.warn("Not all values for a template where found. More specific, the variable {} did not provide any results.", extractor); allValuesFound = false; } } - if ((allValuesFound && referenceCount > 0 && results.contains(onlyConstants)) || !allValuesFound) { - results = new ArrayList<>(); - } + if (!allValuesFound || (referenceCount > 0 && results.contains(onlyConstants.toString()))) + return new ArrayList<>(); return results; } diff --git a/src/main/java/be/ugent/rml/functions/DynamicMultipleRecordsFunctionExecutor.java b/src/main/java/be/ugent/rml/functions/DynamicMultipleRecordsFunctionExecutor.java index 2ffb5c06..144c534f 100644 --- a/src/main/java/be/ugent/rml/functions/DynamicMultipleRecordsFunctionExecutor.java +++ b/src/main/java/be/ugent/rml/functions/DynamicMultipleRecordsFunctionExecutor.java @@ -1,41 +1,57 @@ package be.ugent.rml.functions; +import be.ugent.idlab.knows.dataio.record.Record; +import be.ugent.idlab.knows.functions.agent.Agent; +import be.ugent.idlab.knows.functions.agent.Arguments; import be.ugent.rml.NAMESPACES; -import be.ugent.rml.records.Record; import be.ugent.rml.term.NamedNode; import be.ugent.rml.term.Term; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.*; -import java.util.stream.Collectors; +import java.lang.reflect.InvocationTargetException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; public class DynamicMultipleRecordsFunctionExecutor implements MultipleRecordsFunctionExecutor { private static final Logger logger = LoggerFactory.getLogger(DynamicMultipleRecordsFunctionExecutor.class); - private List parameterValuePairs; - private FunctionLoader functionLoader; + private final List parameterValuePairs; + private final Agent functionAgent; - public DynamicMultipleRecordsFunctionExecutor(List parameterValuePairs, FunctionLoader functionLoader) { + + private boolean needsEOFMarker = false; + + public DynamicMultipleRecordsFunctionExecutor(final List parameterValuePairs, final Agent functionAgent) { this.parameterValuePairs = parameterValuePairs; - this.functionLoader = functionLoader; + this.functionAgent = functionAgent; + // check if executor contains term generator that needs an EOF marker + for (ParameterValueOriginPair parameterValuePair : parameterValuePairs) { + for (TermGeneratorOriginPair valueGeneratorPair : parameterValuePair.getValueGeneratorPairs()) { + if (valueGeneratorPair.getTermGenerator().needsEOFMarker()) { + needsEOFMarker = true; + return; + } + } + } } @Override public Object execute(Map records) throws Exception { - final ArrayList fnTerms = new ArrayList<>(); - final ArrayList args = new ArrayList<>(); + final List fnTerms = new ArrayList<>(); + final Arguments arguments = new Arguments(); + final Record child = records.get("child"); parameterValuePairs.forEach(pv -> { - ArrayList parameters = new ArrayList<>(); - ArrayList values = new ArrayList<>(); + List parameters = new ArrayList<>(); + List values = new ArrayList<>(); pv.getParameterGenerators().forEach(parameterGen -> { try { - parameters.addAll(parameterGen.generate(records.get("child"))); + parameters.addAll(parameterGen.generate(child)); } catch (Exception e) { - //todo be more nice and gentle - e.printStackTrace(); + logger.error(e.getMessage(), e); } }); @@ -43,72 +59,39 @@ public Object execute(Map records) throws Exception { try { values.addAll(pair.getTermGenerator().generate(records.get(pair.getOrigin()))); } catch (Exception e) { - //todo be more nice and gentle - e.printStackTrace(); + logger.error(e.getMessage(), e); } }); if (parameters.contains(new NamedNode(NAMESPACES.FNO + "executes")) || parameters.contains(new NamedNode(NAMESPACES.FNO_S + "executes"))) { if (parameters.contains(new NamedNode(NAMESPACES.FNO + "executes"))) { - logger.warn("http is used instead of https for " + NAMESPACES.FNO_S + ". " + - "Still works for now, but will be deprecated in the future."); + logger.warn("http is used instead of https for {}. Still works for now, but will be deprecated in the future.", NAMESPACES.FNO_S); } - fnTerms.add(values.get(0)); } else { - parameters.forEach(parameter -> { - ArrayList temp = new ArrayList<>(); - - values.forEach(value -> { - temp.add(value.getValue()); - }); - - args.add(new Argument(parameter.getValue(), temp)); - }); + for (Term parameter : parameters) { + for (Term value : values) { + arguments.add(parameter.getValue(), value.getValue()); + } + } } }); - final Map> mergedArgs = new HashMap<>(); - //TODO check if function is list? - args.forEach(arg -> { - if (!mergedArgs.containsKey(arg.getParameter())) { - mergedArgs.put(arg.getParameter(), arg.getArguments()); - } else { - mergedArgs.get(arg.getParameter()).addAll(arg.getArguments()); - } - }); if (fnTerms.isEmpty()) { - throw new Exception("No function was defined for parameters: " + mergedArgs.keySet()); + throw new Exception("No function was defined for parameters: " + arguments.getArgumentNames()); } else { - FunctionModel function = functionLoader.getFunction(fnTerms.get(0)); - return function.execute((Map) mergedArgs); + final String functionId = fnTerms.get(0).getValue(); + try { + return functionAgent.execute(functionId, arguments); + } catch (InvocationTargetException e) { + logger.error("Function '{}' failed to execute with {}", functionId, e.getTargetException().getMessage()); + return null; + } } } -} -/** - * Helper class to combine a parameter and his arguments in one object - */ -class Argument { - /** - * Function Parameter URI - */ - private String parameter; - /** - * All the actual generated values for this parameter - */ - private List arguments; - - Argument(String parameter, List arguments) { - this.parameter = parameter; - this.arguments = arguments; - } - - public String getParameter() { - return parameter; - } - - public List getArguments() { - return arguments; + @Override + public boolean needsEOFMarker() { + return needsEOFMarker; } -} +} \ No newline at end of file diff --git a/src/main/java/be/ugent/rml/functions/DynamicSingleRecordFunctionExecutor.java b/src/main/java/be/ugent/rml/functions/DynamicSingleRecordFunctionExecutor.java index 0d8f6393..294c31e5 100644 --- a/src/main/java/be/ugent/rml/functions/DynamicSingleRecordFunctionExecutor.java +++ b/src/main/java/be/ugent/rml/functions/DynamicSingleRecordFunctionExecutor.java @@ -1,23 +1,23 @@ package be.ugent.rml.functions; +import be.ugent.idlab.knows.functions.agent.Agent; + import java.util.ArrayList; import java.util.List; public class DynamicSingleRecordFunctionExecutor extends AbstractSingleRecordFunctionExecutor { - public DynamicSingleRecordFunctionExecutor(List parameterValuePairs, FunctionLoader functionLoader) { - ArrayList pairs = new ArrayList<>(); + public DynamicSingleRecordFunctionExecutor(List parameterValuePairs, final Agent functionAgent) { + List pairs = new ArrayList<>(); parameterValuePairs.forEach(pair -> { - ArrayList objectGeneratorOriginPairs = new ArrayList<>(); + List objectGeneratorOriginPairs = new ArrayList<>(); - pair.getValueGenerators().forEach(vGen -> { - objectGeneratorOriginPairs.add(new TermGeneratorOriginPair(vGen, "_default")); - }); + pair.getValueGenerators().forEach(vGen -> objectGeneratorOriginPairs.add(new TermGeneratorOriginPair(vGen, "_default"))); pairs.add(new ParameterValueOriginPair(pair.getParameterGenerators(), objectGeneratorOriginPairs)); }); - functionExecutor = new DynamicMultipleRecordsFunctionExecutor(pairs, functionLoader); + functionExecutor = new DynamicMultipleRecordsFunctionExecutor(pairs, functionAgent); } } \ No newline at end of file diff --git a/src/main/java/be/ugent/rml/functions/FunctionLoader.java b/src/main/java/be/ugent/rml/functions/FunctionLoader.java deleted file mode 100644 index 1b04ed5a..00000000 --- a/src/main/java/be/ugent/rml/functions/FunctionLoader.java +++ /dev/null @@ -1,232 +0,0 @@ -package be.ugent.rml.functions; - -import be.ugent.rml.NAMESPACES; -import be.ugent.rml.functions.lib.IDLabFunctions; -import be.ugent.rml.store.RDF4JStore; -import be.ugent.rml.term.NamedNode; -import be.ugent.rml.term.Term; -import be.ugent.rml.Utils; -import be.ugent.rml.functions.lib.UtilFunctions; -import be.ugent.rml.store.QuadStore; -import org.eclipse.rdf4j.rio.RDFFormat; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.IOException; -import java.lang.reflect.Method; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -public class FunctionLoader { - - private static final Logger logger = LoggerFactory.getLogger(FunctionLoader.class); - - private final QuadStore functionDescriptionTriples; - - // updated dynamically - /** - * Cache for loaded classes - */ - private Map classMap; - /** - * Cache for library paths - */ - private Map libraryMap; - /** - * Cache for loaded functions - */ - private Map loadedMethods; - - public FunctionLoader() throws Exception { - this(null, null); - } - - public FunctionLoader(QuadStore functionDescriptionTriples) throws Exception { - this(functionDescriptionTriples, null); - } - - public FunctionLoader(QuadStore functionDescriptionTriples, Map libraryMap) throws Exception { - if (functionDescriptionTriples == null) { - functionDescriptionTriples = new RDF4JStore(); - functionDescriptionTriples.read(Utils.getInputStreamFromFile(Utils.getFile("functions_idlab.ttl")), null, RDFFormat.TURTLE); - functionDescriptionTriples.read(Utils.getInputStreamFromFile(Utils.getFile("functions_grel.ttl")), null, RDFFormat.TURTLE); - functionDescriptionTriples.read(Utils.getInputStreamFromFile(Utils.getFile("grel_java_mapping.ttl")), null, RDFFormat.TURTLE); - } - - this.functionDescriptionTriples = functionDescriptionTriples; - - this.libraryMap = new HashMap<>(); - - if (libraryMap == null) { - this.classMap = new HashMap<>(); - this.classMap.put("IDLabFunctions", IDLabFunctions.class); - this.classMap.put("io.fno.grel.ArrayFunctions", io.fno.grel.ArrayFunctions.class); - this.classMap.put("io.fno.grel.BooleanFunctions", io.fno.grel.BooleanFunctions.class); - this.classMap.put("io.fno.grel.ControlsFunctions", io.fno.grel.ControlsFunctions.class); - this.classMap.put("io.fno.grel.StringFunctions", io.fno.grel.StringFunctions.class); - this.libraryMap.put("IDLabFunctions", "__local"); - this.libraryMap.put("io.fno.grel.ArrayFunctions", "__local"); - this.libraryMap.put("io.fno.grel.BooleanFunctions", "__local"); - this.libraryMap.put("io.fno.grel.ControlsFunctions", "__local"); - this.libraryMap.put("io.fno.grel.StringFunctions", "__local"); - } else { - this.classMap = libraryMap; - for (String key : libraryMap.keySet()) { - this.libraryMap.put(key, "__local"); - } - } - - this.classMap.put("UtilFunctions", UtilFunctions.class); - this.libraryMap.put("UtilFunctions", "__local"); - - this.loadedMethods = new HashMap<>(); - } - - public FunctionModel getFunction(Term iri) throws IOException { - if (!this.loadedMethods.containsKey(iri)) { - logger.debug("Loading function: " + iri); - try { - findMethodOldWay(iri); - logger.warn("Found a function using the old `lib:` way, this is deprecated"); - } catch (IOException e) { - findMethodNewWay(iri); - } - } - - return this.loadedMethods.get(iri); - } - - public String getLibraryPath(String className) { - return this.libraryMap.get(className); - } - - private void findMethodOldWay(Term iri) throws IOException { - List libraries = Utils.getObjectsFromQuads(this.functionDescriptionTriples.getQuads(iri, new NamedNode(NAMESPACES.LIB + "providedBy"), null)); - - if (libraries.size() > 0) { - List pathNames = Utils.getObjectsFromQuads(this.functionDescriptionTriples.getQuads(libraries.get(0), new NamedNode(NAMESPACES.LIB + "localLibrary"), null)); - List classes = Utils.getObjectsFromQuads(this.functionDescriptionTriples.getQuads(libraries.get(0), new NamedNode(NAMESPACES.LIB + "class"), null)); - - if (pathNames.size() > 0 && classes.size() > 0) { - String pathName = pathNames.get(0).getValue(); - String className = classes.get(0).getValue(); - Class cls; - if (this.classMap.containsKey(className)) { - cls = this.classMap.get(className); - } else { - File functionFile = Utils.getFile(pathName); - cls = FunctionUtils.functionRequire(functionFile, className); - this.classMap.put(className, cls); - this.libraryMap.put(className, functionFile.getCanonicalPath()); - } - - List parameters = new ArrayList<>(); - List expectList = Utils.getObjectsFromQuads(FunctionUtils.getQuadsByFunctionPrefix(this.functionDescriptionTriples, iri, "expects", null)); - - if (expectList.size() > 0) { - parameters = Utils.getList(this.functionDescriptionTriples, expectList.get(0)); - } - - List methods = Utils.getObjectsFromQuads(this.functionDescriptionTriples.getQuads(libraries.get(0), new NamedNode(NAMESPACES.LIB + "method"), null)); - - List outputs = Utils.getList(this.functionDescriptionTriples, Utils.getObjectsFromQuads(FunctionUtils.getQuadsByFunctionPrefix(this.functionDescriptionTriples, iri, "returns", null)).get(0)); - // Validation of parameters within FunctionUtils.getFunctionParameterUris - List fnParameterUris = FunctionUtils.getFunctionParameterUris(this.functionDescriptionTriples, parameters); - List fnOutputUris = FunctionUtils.getFunctionParameterUris(this.functionDescriptionTriples, outputs); - - Class[] orderedParameters = FunctionUtils.parseFunctionParameters(this.functionDescriptionTriples, parameters); - if (methods.size() > 0) { - Method fn = null; - try { - fn = cls.getMethod(methods.get(0).getValue(), orderedParameters); - } catch (NoSuchMethodException e) { - throw new IOException("Declared method " + methods.get(0) + " does not exist for class " + classes.get(0) + "."); - } - - FunctionModel fnm = new FunctionModel(iri, fn, fnParameterUris, fnOutputUris); - - this.loadedMethods.put(iri, fnm); - } - } else { - throw new IOException("No library or class was found for the function with IRI " + iri + " in the function descriptions."); - } - } else { - throw new IOException("No library or class was found for the function with IRI " + iri + " in the function descriptions."); - } - } - - private void findMethodNewWay(Term iri) throws IOException { - List mappings = Utils.getSubjectsFromQuads(FunctionUtils.getQuadsByFunctionPrefix(this.functionDescriptionTriples, null, "function", iri)); - - if (mappings.size() == 0) { - throw new IOException("No mapping was found for the function with IRI " + iri + " in the function descriptions."); - } - - List libraries = Utils.getObjectsFromQuads(FunctionUtils.getQuadsByFunctionPrefix(this.functionDescriptionTriples, mappings.get(0), "implementation", null)); - - if (libraries.size() == 0) { - throw new IOException("No library was found for the mapping with IRI " + mappings.get(0) + " in the function descriptions."); - } - List pathNames = Utils.getObjectsFromQuads(this.functionDescriptionTriples.getQuads(libraries.get(0), new NamedNode(NAMESPACES.DOAP + "download-page"), null)); - List classes = Utils.getObjectsFromQuads(this.functionDescriptionTriples.getQuads(libraries.get(0), new NamedNode(NAMESPACES.FNOI + "class-name"), null)); - - if (pathNames.size() == 0 || classes.size() == 0) { - throw new IOException("No path or class found for the library with IRI " + libraries.get(0) + " in the function descriptions."); - } - - String pathName = pathNames.get(0).getValue(); - String className = classes.get(0).getValue(); - Class cls; - - if (this.classMap.containsKey(className)) { - cls = this.classMap.get(className); - } else { - File functionFile = Utils.getFile(pathName); - cls = FunctionUtils.functionRequire(functionFile, className); - this.classMap.put(className, cls); - this.libraryMap.put(className, functionFile.getCanonicalPath()); - } - - List parameters = new ArrayList<>(); - List expectList = Utils.getObjectsFromQuads(FunctionUtils.getQuadsByFunctionPrefix(this.functionDescriptionTriples, iri, "expects", null)); - - if (expectList.size() > 0) { - parameters = Utils.getList(this.functionDescriptionTriples, expectList.get(0)); - } - - - List returns = Utils.getObjectsFromQuads(FunctionUtils.getQuadsByFunctionPrefix(this.functionDescriptionTriples, iri, "returns", null)); - if (returns.isEmpty()) { - throw new IOException("Missing " + NAMESPACES.FNO_S + "returns for " + iri + " in the function descriptions."); - } - List outputs = Utils.getList(this.functionDescriptionTriples, returns.get(0)); - - List methodMappings = Utils.getObjectsFromQuads(FunctionUtils.getQuadsByFunctionPrefix(this.functionDescriptionTriples, mappings.get(0), "methodMapping", null)); - if (methodMappings.size() == 0) { - throw new IOException("No methodmapping found for the mapping with IRI " + mappings.get(0) + " in the function descriptions."); - } - - List methods = Utils.getObjectsFromQuads(this.functionDescriptionTriples.getQuads(methodMappings.get(0), new NamedNode(NAMESPACES.FNOM + "method-name"), null)); - if (methods.size() == 0) { - throw new IOException("No method found for the mapping with IRI " + mappings.get(0) + " in the function descriptions."); - } - - Class[] orderedParameters = FunctionUtils.parseFunctionParameters(this.functionDescriptionTriples, parameters); - Method fn = null; - try { - fn = cls.getDeclaredMethod(methods.get(0).getValue(), orderedParameters); - } catch (NoSuchMethodException e) { - throw new IOException("Declared method " + methods.get(0) + " does not exist for class " + classes.get(0) + "."); - } - - List fnParameterUris = FunctionUtils.getFunctionParameterUris(this.functionDescriptionTriples, parameters); - List fnOutputUris = FunctionUtils.getFunctionParameterUris(this.functionDescriptionTriples, outputs); - - FunctionModel fnm = new FunctionModel(iri, fn, fnParameterUris, fnOutputUris); - - this.loadedMethods.put(iri, fnm); - } -} diff --git a/src/main/java/be/ugent/rml/functions/FunctionModel.java b/src/main/java/be/ugent/rml/functions/FunctionModel.java index 2acb503e..aabd5840 100644 --- a/src/main/java/be/ugent/rml/functions/FunctionModel.java +++ b/src/main/java/be/ugent/rml/functions/FunctionModel.java @@ -60,7 +60,7 @@ private Object[] getParameters(Map parameters) { if (parameters.get(this.parameters.get(i).getValue()) != null) { args[i] = parseParameter(parameters.get(this.parameters.get(i).getValue()), paramTypes[i].getTypeName()); } else { - logger.debug("No argument was found for following parameter: " + this.parameters.get(i).getValue()); + logger.debug("No argument was found for following parameter: {}", this.parameters.get(i).getValue()); args[i] = null; } } diff --git a/src/main/java/be/ugent/rml/functions/FunctionUtils.java b/src/main/java/be/ugent/rml/functions/FunctionUtils.java index 2a1c0ef4..eae47a12 100644 --- a/src/main/java/be/ugent/rml/functions/FunctionUtils.java +++ b/src/main/java/be/ugent/rml/functions/FunctionUtils.java @@ -1,82 +1,10 @@ package be.ugent.rml.functions; -import be.ugent.rml.NAMESPACES; -import be.ugent.rml.Utils; -import be.ugent.rml.store.Quad; -import be.ugent.rml.store.QuadStore; -import be.ugent.rml.term.NamedNode; -import be.ugent.rml.term.Term; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.tools.JavaCompiler; -import javax.tools.ToolProvider; -import java.io.File; -import java.io.IOException; -import java.net.MalformedURLException; -import java.net.URL; -import java.net.URLClassLoader; -import java.time.*; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; public class FunctionUtils { - private static final Logger logger = LoggerFactory.getLogger(FunctionUtils.class); - - public static Class functionRequire(File file, String className) throws IOException { - String path = file.getPath(); - if (path.endsWith(".jar")) { - return FunctionUtils.getClass(file, className, "application/java-archive"); - } else if (path.endsWith(".java")) { - return FunctionUtils.getClass(file, className, "text/x-java-source"); - } - - throw new IOException("Not a valid path for a JAVA implementation: " + path); - } - - /** - * Returns and validates parameters - * @param store - * @param parameterResources - * @return - */ - public static List getFunctionParameterUris(QuadStore store, List parameterResources) { - List parameterPredicates = new ArrayList<>(); - - try { - for (Term subject : parameterResources) { - parameterPredicates.add(Utils.getObjectsFromQuads(getQuadsByFunctionPrefix(store, subject, "predicate", null)).get(0)); - } - } catch (Exception e) { - logger.error("Missing function parameters in {}", parameterResources); - } - - return parameterPredicates; - } - - public static Class[] parseFunctionParameters(QuadStore store, List parameterResources) - throws IOException { - Class[] args = new Class[parameterResources.size()]; - - for (int i = 0; i < parameterResources.size(); i++) { - Term subject = parameterResources.get(i); - List types = Utils.getObjectsFromQuads(getQuadsByFunctionPrefix(store, subject, "type", null)); - if (types.isEmpty()) { - throw new IOException("Missing " + NAMESPACES.FNO_S + "type for " + subject + " in function descriptions."); - } - Term type = types.get(0); - - try { - args[i] = FunctionUtils.getParamType(type); - } catch (Exception e) { - args[i] = String.class; - } - } - return args; - } - /** * Generates strings from a function object. Possible lists/sets/bags/... in the object are unrolled recursively * and a string value is generated from each "simple" (i.e., not a list/set/bag/...) child object. @@ -84,13 +12,18 @@ public static Class[] parseFunctionParameters(QuadStore store, List par * @param o Function object, can be iterable. * @param result A string list to which string values of objects are added */ - public static void functionObjectToList(Object o, List result) { + public static List functionObjectToList(Object o) { + final List result = new ArrayList<>(); if (o != null) { // if o has child objects, recursively call this function on each child if (o instanceof Iterable) { ((Iterable) o).forEach(item -> { - functionObjectToList(item, result); + result.addAll(functionObjectToList(item)); }); + // Some functions return a regular Array, not an Iterable, handle those as well. + } else if (o instanceof Object[]) { + for (Object item: (Object[])o) + result.addAll(functionObjectToList(item)); } // if o has no children, call toString() to serialize it into a string else { @@ -100,157 +33,6 @@ public static void functionObjectToList(Object o, List result) { result.add(o.toString()); } } - } - - private static Class getParamType(Term type) { - String typeStr = type.getValue(); - - switch (typeStr) { - // This is quite crude, based on https://www.w3.org/TR/xmlschema11-2/#built-in-datatypes - case "http://www.w3.org/2001/XMLSchema#any": - return Object.class; - case "http://www.w3.org/2001/XMLSchema#string": - return String.class; - case "http://www.w3.org/2001/XMLSchema#unsignedLong": - case "http://www.w3.org/2001/XMLSchema#long": - return Long.class; - case "http://www.w3.org/2001/XMLSchema#integer": - case "http://www.w3.org/2001/XMLSchema#int": - case "http://www.w3.org/2001/XMLSchema#short": - case "http://www.w3.org/2001/XMLSchema#byte": - case "http://www.w3.org/2001/XMLSchema#nonNegativeInteger": - case "http://www.w3.org/2001/XMLSchema#positiveInteger": - case "http://www.w3.org/2001/XMLSchema#unsignedInt": - case "http://www.w3.org/2001/XMLSchema#unsignedShort": - case "http://www.w3.org/2001/XMLSchema#unsignedByte": - case "http://www.w3.org/2001/XMLSchema#nonPositiveInteger": - case "http://www.w3.org/2001/XMLSchema#negativeInteger": - return Integer.class; - case "http://www.w3.org/2001/XMLSchema#boolean": - return Boolean.class; - case "http://www.w3.org/2001/XMLSchema#date": - // "Local" just means "without a time zone" - return LocalDate.class; - case "http://www.w3.org/2001/XMLSchema#dateTime": - // again "Local" means "without a time zone" - // (An xsd:dateTime actually has an OPTIONAL time zone, so there is a small semantic difference - // with java.time.LocalDateTime, this is a best effort.) - return LocalDateTime.class; - case "http://www.w3.org/2001/XMLSchema#dateTimeStamp": - return ZonedDateTime.class; - case "http://www.w3.org/2001/XMLSchema#dayTimeDuration": - case "http://www.w3.org/2001/XMLSchema#yearMonthDuration": - return Duration.class; - case "http://www.w3.org/2001/XMLSchema#gDay": - // TODO there is no java.time equivalent of xsd:day - // (There is java.time.DayOfWeek, but xsd:day would corresponds to java.time.DayOfMonth .) - throw new DateTimeException("There is no java.time equivalent of xsd:day. Crashing."); - case "http://www.w3.org/2001/XMLSchema#gMonth": - return Month.class; - case "http://www.w3.org/2001/XMLSchema#gMonthDay": - return MonthDay.class; - case "http://www.w3.org/2001/XMLSchema#gYear": - return Year.class; - case "http://www.w3.org/2001/XMLSchema#gYearMonth": - return YearMonth.class; - case "http://www.w3.org/2001/XMLSchema#decimal": - case "http://www.w3.org/2001/XMLSchema#double": - case "http://www.w3.org/2001/XMLSchema#float": - return Double.class; - case "http://www.w3.org/1999/02/22-rdf-syntax-ns#List": - return List.class; - default: - throw new Error("Couldn't derive type from " + type); - } - } - - private static Class getClass(File sourceFile, String className, String mime) throws IOException { - logger.info("Found class on path " + sourceFile.getCanonicalPath()); - - switch (mime) { - case "text/x-java-source": - return FunctionUtils.getClassFromJAVA(sourceFile, className); - case "application/java-archive": - return FunctionUtils.getClassFromJAR(sourceFile, className); - } - - return null; - } - - private static Class getClassFromJAVA(File sourceFile, String className) { - Class cls = null; - - // TODO let's not recompile every time - // Compile source file. - JavaCompiler compiler = ToolProvider.getSystemJavaCompiler(); - int res = compiler.run(null, null, null, sourceFile.getPath()); - - if (res != 0) { - return null; - } - - // Load and instantiate compiled class. - URLClassLoader classLoader = null; - try { - classLoader = URLClassLoader.newInstance(new URL[]{(new File(sourceFile.getParent())).toURI().toURL()}); - cls = Class.forName(className, true, classLoader); - } catch (MalformedURLException | ClassNotFoundException e) { - e.printStackTrace(); - } - - return cls; - } - - private static Class getClassFromJAR(File sourceFile, String className) { - Class cls = null; - - URLClassLoader child = null; - try { - child = URLClassLoader.newInstance(new URL[]{sourceFile.toURI().toURL()}); - cls = Class.forName(className, true, child); - } catch (MalformedURLException | ClassNotFoundException e) { - e.printStackTrace(); - } - - return cls; - } - - /** - * Retrieve triples of a store based on a predicate, taking into account deprecated FnO prefixed predicates - * @param store The triple store to retrieve the triples from - * @param s the subject - * @param functionTerm the unprefixed function term - * @param o the object - * @return the quads that are conform to the triple pattern fragment - */ - static List getQuadsByFunctionPrefix(QuadStore store, Term s, String functionTerm, Term o) { - List prefices = Arrays.asList(NAMESPACES.FNO_S, NAMESPACES.FNO, NAMESPACES.FNO_OLD); - return getQuadsByPrefix(store, s, functionTerm, o, prefices); - } - - /** - * Retrieve triples of a store based on a predicate, taking into account multiple prefixes - * @param store The triple store to retrieve the triples from - * @param s the subject - * @param pString the unprefixed predicate term - * @param o the object - * @param prefices the list of prefices on which to look for, in order of 'correctness' (all prefixes except for the first one are assumed deprecated) - * @return the quads that are conform to the triple pattern fragment - */ - private static List getQuadsByPrefix(QuadStore store, Term s, String pString, Term o, List prefices) { - String preferredPrefix = prefices.get(0); - Term realTerm; - List quads = new ArrayList<>(); - for (int i = 0; i < prefices.size(); i++) { - realTerm = new NamedNode(prefices.get(i) + pString); - quads = store.getQuads(s, realTerm, o); - if (quads.size() > 0) { - if (i != 0) { - logger.warn(prefices.get(i) + "is a deprecated prefix, please use " + preferredPrefix); - } - return quads; - } - } - return quads; + return result; } } diff --git a/src/main/java/be/ugent/rml/functions/MultipleRecordsFunctionExecutor.java b/src/main/java/be/ugent/rml/functions/MultipleRecordsFunctionExecutor.java index 4cad8559..fed8cbe6 100644 --- a/src/main/java/be/ugent/rml/functions/MultipleRecordsFunctionExecutor.java +++ b/src/main/java/be/ugent/rml/functions/MultipleRecordsFunctionExecutor.java @@ -1,10 +1,16 @@ package be.ugent.rml.functions; -import be.ugent.rml.records.Record; +import be.ugent.idlab.knows.dataio.record.Record; import java.util.Map; public interface MultipleRecordsFunctionExecutor { Object execute(Map records) throws Exception; + + /** + * Returns {@code true} when a function is used in this executor that needs a special marker + * to indicate "End-of-File" (EOF). + */ + default boolean needsEOFMarker() {return false;} } diff --git a/src/main/java/be/ugent/rml/functions/SingleRecordFunctionExecutor.java b/src/main/java/be/ugent/rml/functions/SingleRecordFunctionExecutor.java index 8a08eea4..d15880f8 100644 --- a/src/main/java/be/ugent/rml/functions/SingleRecordFunctionExecutor.java +++ b/src/main/java/be/ugent/rml/functions/SingleRecordFunctionExecutor.java @@ -1,8 +1,17 @@ package be.ugent.rml.functions; -import be.ugent.rml.records.Record; + +import be.ugent.idlab.knows.dataio.record.Record; public interface SingleRecordFunctionExecutor { Object execute(Record record) throws Exception; + + /** + * Returns {@code true} when a function is used in this extractor that needs a special marker + * to indicate "End-of-File" (EOF). + */ + default boolean needsEOFMarker() { + return false; + } } diff --git a/src/main/java/be/ugent/rml/functions/StaticMultipleRecordsFunctionExecutor.java b/src/main/java/be/ugent/rml/functions/StaticMultipleRecordsFunctionExecutor.java index c38fb573..35074fd0 100644 --- a/src/main/java/be/ugent/rml/functions/StaticMultipleRecordsFunctionExecutor.java +++ b/src/main/java/be/ugent/rml/functions/StaticMultipleRecordsFunctionExecutor.java @@ -1,23 +1,27 @@ package be.ugent.rml.functions; -import be.ugent.rml.records.Record; +import be.ugent.idlab.knows.dataio.record.Record; +import be.ugent.idlab.knows.functions.agent.Agent; +import be.ugent.idlab.knows.functions.agent.Arguments; -import java.util.HashMap; import java.util.Map; public class StaticMultipleRecordsFunctionExecutor implements MultipleRecordsFunctionExecutor { - private final FunctionModel functionModel; private final Map parameters; - public StaticMultipleRecordsFunctionExecutor(FunctionModel model, Map parameters) { - this.functionModel = model; + private final Agent functionAgent; + private final String functionId; + + public StaticMultipleRecordsFunctionExecutor(final Map parameters, Agent functionAgent, String functionId) { this.parameters = parameters; + this.functionAgent = functionAgent; + this.functionId = functionId; } @Override public Object execute(Map records) throws Exception { - Map filledInParameters = new HashMap<>(); + final Arguments functionArguments = new Arguments(); for (Map.Entry entry : this.parameters.entrySet()) { SingleRecordFunctionExecutor executor = (SingleRecordFunctionExecutor) entry.getValue()[1]; @@ -25,14 +29,10 @@ public Object execute(Map records) throws Exception { Object o = executor.execute(records.get(recordType)); - if (o != null) { - filledInParameters.put(entry.getKey(), o); - } else { - // TODO check whether key is actually optional! - filledInParameters.put(entry.getKey(), null); - } + // TODO check whether key is actually optional! + functionArguments.add(entry.getKey(), o); } - return this.functionModel.execute(filledInParameters); + return functionAgent.execute(functionId, functionArguments); } } diff --git a/src/main/java/be/ugent/rml/functions/StaticSingleRecordFunctionExecutor.java b/src/main/java/be/ugent/rml/functions/StaticSingleRecordFunctionExecutor.java deleted file mode 100644 index 18ebec0e..00000000 --- a/src/main/java/be/ugent/rml/functions/StaticSingleRecordFunctionExecutor.java +++ /dev/null @@ -1,25 +0,0 @@ -package be.ugent.rml.functions; - -import be.ugent.rml.Template; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -public class StaticSingleRecordFunctionExecutor extends AbstractSingleRecordFunctionExecutor { - - public StaticSingleRecordFunctionExecutor(FunctionModel model, Map> parameters) { - HashMap parametersForOtherExecutor = new HashMap<>(); - - parameters.keySet().forEach(parameter -> { - List