RYA-469 Added tests for Rya Streams join iterator bug using LUBM data.
Project: http://git-wip-us.apache.org/repos/asf/incubator-rya/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-rya/commit/5adda982 Tree: http://git-wip-us.apache.org/repos/asf/incubator-rya/tree/5adda982 Diff: http://git-wip-us.apache.org/repos/asf/incubator-rya/diff/5adda982 Branch: refs/heads/master Commit: 5adda98205723a868f5e40912ff83a29fb9cb958 Parents: cb947a9 Author: eric.white <eric.wh...@parsons.com> Authored: Thu Mar 29 15:32:56 2018 -0400 Committer: Valiyil <puja.vali...@parsons.com> Committed: Fri Apr 6 11:59:36 2018 -0400 ---------------------------------------------------------------------- common/rya.api/pom.xml | 14 + .../org/apache/rya/api/utils/UuidUtils.java | 52 + .../org/apache/rya/api/utils/LubmQuery.java | 342 + extras/indexingExample/pom.xml | 5 + .../src/main/java/MongoRyaDirectExample.java | 46 + .../src/main/resources/lubm-1uni-withschema.nt | 100836 ++++++++++++++++ extras/rya.streams/client/pom.xml | 6 + .../AddQueryAndLoadStatementsStreamsIT.java | 206 + .../client/command/RunQueryCommandIT.java | 81 +- .../src/test/resources/lubm-1uni-withschema.nt | 100836 ++++++++++++++++ .../streams/kafka/interactor/KafkaRunQuery.java | 2 +- .../kafka/processors/ProcessorResult.java | 4 +- .../processors/join/JoinProcessorSupplier.java | 9 +- .../processors/join/KeyValueJoinStateStore.java | 62 +- .../kafka/topology/TopologyBuilderFactory.java | 13 + .../streams/kafka/topology/TopologyFactory.java | 6 +- pom.xml | 6 + .../apache/rya/test/kafka/KafkaTestUtil.java | 8 +- 18 files changed, 202515 insertions(+), 19 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5adda982/common/rya.api/pom.xml ---------------------------------------------------------------------- diff --git a/common/rya.api/pom.xml b/common/rya.api/pom.xml index 368a5da..b3e79b3 100644 --- a/common/rya.api/pom.xml +++ b/common/rya.api/pom.xml @@ -110,4 +110,18 @@ under the License. </dependency> </dependencies> + <build> + <plugins> + <plugin> + <artifactId>maven-jar-plugin</artifactId> + <executions> + <execution> + <goals> + <goal>test-jar</goal> + </goals> + </execution> + </executions> + </plugin> + </plugins> + </build> </project> http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5adda982/common/rya.api/src/main/java/org/apache/rya/api/utils/UuidUtils.java ---------------------------------------------------------------------- diff --git a/common/rya.api/src/main/java/org/apache/rya/api/utils/UuidUtils.java b/common/rya.api/src/main/java/org/apache/rya/api/utils/UuidUtils.java new file mode 100644 index 0000000..6787af5 --- /dev/null +++ b/common/rya.api/src/main/java/org/apache/rya/api/utils/UuidUtils.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.api.utils; + +import static java.util.Objects.requireNonNull; + +import java.util.UUID; + +/** + * Utility methods and constants for {@link UUID}s. + */ +public final class UuidUtils { + /** + * The length of a {@link UUID} string. It is 32 characters long and has 4 + * hyphens for a total of 36 characters. + */ + public static final int UUID_STRING_LENGTH = 36; + + /** + * Private constructor to prevent instantiation. + */ + private UuidUtils() { + } + + /** + * Extracts a UUID from the end of a string. + * @param text the string to extract from. (not {@code null}. + * @return the {@link UUID}. + */ + public static UUID extractUuidFromStringEnd(final String text) { + requireNonNull(text); + final String uuidString = text.substring(text.length() - UUID_STRING_LENGTH, text.length()); + final UUID uuid = UUID.fromString(uuidString); + return uuid; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5adda982/common/rya.api/src/test/java/org/apache/rya/api/utils/LubmQuery.java ---------------------------------------------------------------------- diff --git a/common/rya.api/src/test/java/org/apache/rya/api/utils/LubmQuery.java b/common/rya.api/src/test/java/org/apache/rya/api/utils/LubmQuery.java new file mode 100644 index 0000000..6ca9d38 --- /dev/null +++ b/common/rya.api/src/test/java/org/apache/rya/api/utils/LubmQuery.java @@ -0,0 +1,342 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.rya.api.utils; + +import static java.util.Objects.requireNonNull; + +import java.util.List; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableList.Builder; + +/** + * Holds common LUBM sample test queries. + */ +public enum LubmQuery { + /** + * This query bears large input and high selectivity. It queries about just + * one class and one property and does not assume any hierarchy information + * or inference. + */ + LUBM_QUERY_1( + "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> \n" + + "PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#> \n" + + "SELECT ?X WHERE \n" + + "{ \n" + + " ?X rdf:type ub:GraduateStudent . \n" + + " ?X ub:takesCourse <http://www.Department0.University0.edu/GraduateCourse0> \n" + + "}", + true + ), + + /** + * This query increases in complexity: 3 classes and 3 properties are + * involved. Additionally, there is a triangular pattern of relationships + * between the objects involved. + */ + LUBM_QUERY_2( + "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> \n" + + "PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#> \n" + + "SELECT ?X ?Y ?Z WHERE \n" + + "{ \n" + + " ?X rdf:type ub:GraduateStudent . \n" + + " ?Y rdf:type ub:University . \n" + + " ?Z rdf:type ub:Department . \n" + + " ?X ub:memberOf ?Z .\n" + + " ?Z ub:subOrganizationOf ?Y . \n" + + " ?X ub:undergraduateDegreeFrom ?Y \n" + + "}", + true + ), + + /** + * This query is similar to Query 1 but class Publication has a wide + * hierarchy. + */ + LUBM_QUERY_3( + "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> \n" + + "PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#> \n" + + "SELECT ?X WHERE \n" + + "{ \n" + + " ?X rdf:type ub:Publication . \n" + + " ?X ub:publicationAuthor <http://www.Department0.University0.edu/AssistantProfessor0> \n" + + "}", + true + ), + + /** + * This query has small input and high selectivity. It assumes subClassOf + * relationship between Professor and its subclasses. Class Professor has a + * wide hierarchy. Another feature is that it queries about multiple + * properties of a single class. + */ + LUBM_QUERY_4( + "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> \n" + + "PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#> \n" + + "SELECT ?X ?Y1 ?Y2 ?Y3 WHERE \n" + + "{ \n" + + " ?X rdf:type ub:Professor . \n" + + " ?X ub:worksFor <http://www.Department0.University0.edu> . \n" + + " ?X ub:name ?Y1 . \n" + + " ?X ub:emailAddress ?Y2 . \n" + + " ?X ub:telephone ?Y3 \n" + + "}", + false + ), + + /** + * This query assumes subClassOf relationship between Person and its + * subclasses and subPropertyOf relationship between memberOf and its + * subproperties. Moreover, class Person features a deep and wide hierarchy. + */ + LUBM_QUERY_5( + "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> \n" + + "PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#> \n" + + "SELECT ?X WHERE \n" + + "{ \n" + + " ?X rdf:type ub:Person . \n" + + " ?X ub:memberOf <http://www.Department0.University0.edu> \n" + + "}", + false + ), + + /** + * This query queries about only one class. But it assumes both the explicit + * subClassOf relationship between UndergraduateStudent and Student and the + * implicit one between GraduateStudent and Student. In addition, it has + * large input and low selectivity. + */ + LUBM_QUERY_6( + "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> \n" + + "PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#> \n" + + "SELECT ?X WHERE \n" + + "{ \n" + + " ?X rdf:type ub:Student \n" + + "}", + false + ), + + /** + * This query is similar to Query 6 in terms of class Student but it + * increases in the number of classes and properties and its selectivity is + * high. + */ + LUBM_QUERY_7( + "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> \n" + + "PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#> \n" + + "SELECT ?X ?Y WHERE \n" + + "{ \n" + + " ?X rdf:type ub:Student . \n" + + " ?Y rdf:type ub:Course . \n" + + " ?X ub:takesCourse ?Y . \n" + + " <http://www.Department0.University0.edu/AssociateProfessor0> ub:teacherOf ?Y \n" + + "}", + false + ), + + /** + * This query is further more complex than Query 7 by including one more + * property. + */ + LUBM_QUERY_8( + "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> \n" + + "PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#> \n" + + "SELECT ?X ?Y ?Z WHERE \n" + + "{ \n" + + " ?X rdf:type ub:Student . \n" + + " ?Y rdf:type ub:Department .\n" + + " ?X ub:memberOf ?Y . \n" + + " ?Y ub:subOrganizationOf <http://www.University0.edu> . \n" + + " ?X ub:emailAddress ?Z \n" + + "}", + false + ), + + /** + * Besides the aforementioned features of class Student and the wide + * hierarchy of class Faculty, like Query 2, this query is characterized by + * the most classes and properties in the query set and there is a + * triangular pattern of relationships. + */ + LUBM_QUERY_9( + "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> \n" + + "PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#> \n" + + "SELECT ?X ?Y ?Z WHERE \n" + + "{ \n" + + " ?X rdf:type ub:Student . \n" + + " ?Y rdf:type ub:Faculty . \n" + + " ?Z rdf:type ub:Course . \n" + + " ?X ub:advisor ?Y . \n" + + " ?Y ub:teacherOf ?Z . \n" + + " ?X ub:takesCourse ?Z \n" + + "}", + false + ), + + /** + * This query differs from Query 6, 7, 8 and 9 in that it only requires the + * (implicit) subClassOf relationship between GraduateStudent and Student, + * i.e., subClassOf relationship between UndergraduateStudent and Student + * does not add to the results. + */ + LUBM_QUERY_10( + "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> \n" + + "PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#> \n" + + "SELECT ?X WHERE \n" + + "{ \n" + + " ?X rdf:type ub:Student . \n" + + " ?X ub:takesCourse <http://www.Department0.University0.edu/GraduateCourse0> \n" + + "}", + false + ), + + /** + * Query 11, 12 and 13 are intended to verify the presence of certain OWL + * reasoning capabilities in the system. In this query, property + * subOrganizationOf is defined as transitive. Since in the benchmark data, + * instances of ResearchGroup are stated as a sub-organization of a + * Department individual and the later suborganization of a University + * individual, inference about the subOrgnizationOf relationship between + * instances of ResearchGroup and University is required to answer this + * query. Additionally, its input is small. + */ + LUBM_QUERY_11( + "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> \n" + + "PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#> \n" + + "SELECT ?X WHERE \n" + + "{ \n" + + " ?X rdf:type ub:ResearchGroup . \n" + + " ?X ub:subOrganizationOf <http://www.University0.edu> \n" + + "}", + false + ), + + /** + * The benchmark data do not produce any instances of class Chair. Instead, + * each Department individual is linked to the chair professor of that + * department by property headOf. Hence this query requires realization, + * i.e., inference that that professor is an instance of class Chair because + * he or she is the head of a department. Input of this query is small as + * well. + */ + LUBM_QUERY_12( + "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> \n" + + "PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#> \n" + + "SELECT ?X ?Y WHERE \n" + + "{ \n" + + " ?X rdf:type ub:Chair . \n" + + " ?Y rdf:type ub:Department . \n" + + " ?X ub:worksFor ?Y . \n" + + " ?Y ub:subOrganizationOf <http://www.University0.edu> \n" + + "}", + false + ), + + /** + * Property hasAlumnus is defined in the benchmark ontology as the inverse + * of property degreeFrom, which has three subproperties: + * undergraduateDegreeFrom, mastersDegreeFrom, and doctoralDegreeFrom. The + * benchmark data state a person as an alumnus of a university using one of + * these three subproperties instead of hasAlumnus. Therefore, this query + * assumes subPropertyOf relationships between degreeFrom and its + * subproperties, and also requires inference about inverseOf. + */ + LUBM_QUERY_13( + "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> \n" + + "PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#> \n" + + "SELECT ?X WHERE \n" + + "{ \n" + + " ?X rdf:type ub:Person . \n" + + " <http://www.University0.edu> ub:hasAlumnus ?X \n" + + "}", + false + ), + + /** + * This query is the simplest in the test set. This query represents those + * with large input and low selectivity and does not assume any hierarchy + * information or inference. + */ + LUBM_QUERY_14( + "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> \n" + + "PREFIX ub: <http://swat.cse.lehigh.edu/onto/univ-bench.owl#> \n" + + "SELECT ?X WHERE \n" + + "{ \n" + + " ?X rdf:type ub:UndergraduateStudent \n" + + "}", + true + ); + + private String sparqlQuery; + private boolean isSupported; + + /** + * Creates a new {@link LubmQuery}. + * @param sparqlQuery the SPARQL query. (not {@code null}) + * @param isSupported {@code true} if the query type is supported by Rya. + * {@code false} otherwise. + */ + private LubmQuery(final String sparqlQuery, final boolean isSupported) { + this.sparqlQuery = requireNonNull(sparqlQuery); + this.isSupported = isSupported; + } + + /** + * @return the SPARQL query. + */ + public String getSparqlQuery() { + return sparqlQuery; + } + + /** + * @return {@code true} if the query type is supported by Rya. {@code false} + * otherwise. + */ + public boolean isSupported() { + return isSupported; + } + + /** + * @return a {@link List} of every sample {@link LubmQuery} that is + * supported by Rya. + */ + public static List<LubmQuery> getSupportedQueries() { + final Builder<LubmQuery> builder = ImmutableList.builder(); + for (final LubmQuery lubmQuery : LubmQuery.values()) { + if (lubmQuery.isSupported()) { + builder.add(lubmQuery); + } + } + return builder.build(); + } + + /** + * @return a {@link List} of every sample {@link LubmQuery} that is NOT + * supported by Rya. + */ + public static List<LubmQuery> getUnsupportedQueries() { + final Builder<LubmQuery> builder = ImmutableList.builder(); + for (final LubmQuery lubmQuery : LubmQuery.values()) { + if (!lubmQuery.isSupported()) { + builder.add(lubmQuery); + } + } + return builder.build(); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5adda982/extras/indexingExample/pom.xml ---------------------------------------------------------------------- diff --git a/extras/indexingExample/pom.xml b/extras/indexingExample/pom.xml index 43e61f0..bb9e07b 100644 --- a/extras/indexingExample/pom.xml +++ b/extras/indexingExample/pom.xml @@ -84,6 +84,11 @@ under the License. <artifactId>junit</artifactId> <scope>test</scope> </dependency> + <dependency> + <groupId>org.apache.rya</groupId> + <artifactId>rya.api</artifactId> + <type>test-jar</type> + </dependency> </dependencies> <build> http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/5adda982/extras/indexingExample/src/main/java/MongoRyaDirectExample.java ---------------------------------------------------------------------- diff --git a/extras/indexingExample/src/main/java/MongoRyaDirectExample.java b/extras/indexingExample/src/main/java/MongoRyaDirectExample.java index 21e5dea..f2a01a5 100644 --- a/extras/indexingExample/src/main/java/MongoRyaDirectExample.java +++ b/extras/indexingExample/src/main/java/MongoRyaDirectExample.java @@ -17,7 +17,10 @@ * under the License. */ +import java.io.File; import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.List; import org.apache.commons.lang.Validate; @@ -27,6 +30,7 @@ import org.apache.log4j.Level; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.log4j.PatternLayout; +import org.apache.rya.api.utils.LubmQuery; import org.apache.rya.indexing.accumulo.ConfigUtils; import org.apache.rya.indexing.mongodb.MongoIndexingConfiguration; import org.apache.rya.indexing.mongodb.MongoIndexingConfiguration.MongoDBIndexingConfigBuilder; @@ -55,6 +59,8 @@ import org.openrdf.repository.RepositoryException; import org.openrdf.repository.RepositoryResult; import org.openrdf.repository.sail.SailRepository; import org.openrdf.repository.sail.SailRepositoryConnection; +import org.openrdf.rio.RDFFormat; +import org.openrdf.rio.RDFParseException; import org.openrdf.sail.Sail; import de.flapdoodle.embed.mongo.config.IMongoConfig; @@ -67,6 +73,10 @@ public class MongoRyaDirectExample { private static final boolean IS_DETAILED_LOGGING_ENABLED = false; + private static final boolean USE_LUBM_QUERIES = true; + private static final Path LUBM_FILE = Paths.get("src/main/resources/lubm-1uni-withschema.nt"); + private static final String LUBM_PREFIX = "http://swat.cse.lehigh.edu/onto/univ-bench.owl#"; + // // Connection configuration parameters // @@ -108,6 +118,10 @@ public class MongoRyaDirectExample { conn = repository.getConnection(); final long start = System.currentTimeMillis(); + if (USE_LUBM_QUERIES) { + log.info("Running LUBM Sample Queries"); + testLubmFile(conn); + } log.info("Running SPARQL Example: Add and Delete"); testAddAndDelete(conn); testAddAndDeleteNoContext(conn); @@ -805,6 +819,38 @@ public class MongoRyaDirectExample { Validate.isTrue(resultHandler.getCount() == 0); } + public static void testLubmFile(final SailRepositoryConnection conn) throws MalformedQueryException, RepositoryException, + UpdateExecutionException, QueryEvaluationException, TupleQueryResultHandlerException, RDFParseException, IOException { + + final String query = LubmQuery.LUBM_QUERY_14.getSparqlQuery(); +// "PREFIX lubm: <" + LUBM_PREFIX + "> \n" + +// "SELECT * WHERE \n" + +// "{ \n" + +// " ?graduateStudent a lubm:GraduateStudent . \n" + +// " ?underGradUniversity a lubm:University . \n" + +// " ?graduateStudent lubm:undergraduateDegreeFrom ?underGradUniversity . \n" + +// "}"; + + log.info("Query to be Performed on LUBM Data :\n\n" + query + "\n"); + + log.info("Adding LUBM Data from: " + LUBM_FILE.toAbsolutePath()); + addTriples(conn, LUBM_FILE.toFile(), RDFFormat.NTRIPLES); + + log.info("Executing LUBM Query"); + final CountingResultHandler resultHandler = new CountingResultHandler(); + final TupleQuery tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, query); + tupleQuery.evaluate(resultHandler); + log.info("Result count : " + resultHandler.getCount()); + + Validate.isTrue(resultHandler.getCount() > 0); + } + + private static void addTriples(final SailRepositoryConnection conn, final File triplesFile, final RDFFormat rdfFormat) throws RDFParseException, RepositoryException, IOException { + conn.begin(); + conn.add(triplesFile, "", rdfFormat); + conn.commit(); + } + private static class CountingResultHandler implements TupleQueryResultHandler { private int count = 0;