http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/7727b165/extras/indexingExample/src/main/java/MongoRyaDirectExample.java ---------------------------------------------------------------------- diff --git a/extras/indexingExample/src/main/java/MongoRyaDirectExample.java b/extras/indexingExample/src/main/java/MongoRyaDirectExample.java index 8a2bac5..b741e6a 100644 --- a/extras/indexingExample/src/main/java/MongoRyaDirectExample.java +++ b/extras/indexingExample/src/main/java/MongoRyaDirectExample.java @@ -44,8 +44,8 @@ import org.openrdf.repository.sail.SailRepositoryConnection; import org.openrdf.sail.Sail; import mvm.rya.api.RdfCloudTripleStoreConfiguration; +import mvm.rya.indexing.GeoConstants; import mvm.rya.indexing.accumulo.ConfigUtils; -import mvm.rya.indexing.accumulo.geo.GeoConstants; import mvm.rya.mongodb.MongoDBRdfConfiguration; import mvm.rya.rdftriplestore.RdfCloudTripleStore; import mvm.rya.rdftriplestore.inference.InferenceEngineException; @@ -83,7 +83,7 @@ public class MongoRyaDirectExample { testAddAndDelete(conn); testAddAndDeleteNoContext(conn); testAddNamespaces(conn); - testAddPointAndWithinSearch(conn); +// testAddPointAndWithinSearch(conn); testAddAndFreeTextSearchWithPCJ(conn); // to test out inference, set inference to true in the conf if (USE_INFER){ @@ -100,60 +100,60 @@ public class MongoRyaDirectExample { } } - private static void testAddPointAndWithinSearch(SailRepositoryConnection conn) throws Exception { - - String update = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "// - + "INSERT DATA { " // - + " <urn:feature> a geo:Feature ; " // - + " geo:hasGeometry [ " // - + " a geo:Point ; " // - + " geo:asWKT \"Point(-77.03524 38.889468)\"^^geo:wktLiteral "// - + " ] . " // - + "}"; - - Update u = conn.prepareUpdate(QueryLanguage.SPARQL, update); - u.execute(); - - String queryString; - TupleQuery tupleQuery; - CountingResultHandler tupleHandler; - - // ring containing point - queryString = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "// - + "PREFIX geof: <http://www.opengis.net/def/function/geosparql/> "// - + "SELECT ?feature ?point ?wkt " // - + "{" // - + " ?feature a geo:Feature . "// - + " ?feature geo:hasGeometry ?point . "// - + " ?point a geo:Point . "// - + " ?point geo:asWKT ?wkt . "// - + " FILTER(geof:sfWithin(?wkt, \"POLYGON((-78 39, -77 39, -77 38, -78 38, -78 39))\"^^geo:wktLiteral)) " // - + "}";// - tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, queryString); - - tupleHandler = new CountingResultHandler(); - tupleQuery.evaluate(tupleHandler); - log.info("Result count : " + tupleHandler.getCount()); - Validate.isTrue(tupleHandler.getCount() >= 1); // may see points from during previous runs - - // ring outside point - queryString = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "// - + "PREFIX geof: <http://www.opengis.net/def/function/geosparql/> "// - + "SELECT ?feature ?point ?wkt " // - + "{" // - + " ?feature a geo:Feature . "// - + " ?feature geo:hasGeometry ?point . "// - + " ?point a geo:Point . "// - + " ?point geo:asWKT ?wkt . "// - + " FILTER(geof:sfWithin(?wkt, \"POLYGON((-77 39, -76 39, -76 38, -77 38, -77 39))\"^^geo:wktLiteral)) " // - + "}";// - tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, queryString); - - tupleHandler = new CountingResultHandler(); - tupleQuery.evaluate(tupleHandler); - log.info("Result count : " + tupleHandler.getCount()); - Validate.isTrue(tupleHandler.getCount() == 0); - } +// private static void testAddPointAndWithinSearch(SailRepositoryConnection conn) throws Exception { +// +// String update = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "// +// + "INSERT DATA { " // +// + " <urn:feature> a geo:Feature ; " // +// + " geo:hasGeometry [ " // +// + " a geo:Point ; " // +// + " geo:asWKT \"Point(-77.03524 38.889468)\"^^geo:wktLiteral "// +// + " ] . " // +// + "}"; +// +// Update u = conn.prepareUpdate(QueryLanguage.SPARQL, update); +// u.execute(); +// +// String queryString; +// TupleQuery tupleQuery; +// CountingResultHandler tupleHandler; +// +// // ring containing point +// queryString = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "// +// + "PREFIX geof: <http://www.opengis.net/def/function/geosparql/> "// +// + "SELECT ?feature ?point ?wkt " // +// + "{" // +// + " ?feature a geo:Feature . "// +// + " ?feature geo:hasGeometry ?point . "// +// + " ?point a geo:Point . "// +// + " ?point geo:asWKT ?wkt . "// +// + " FILTER(geof:sfWithin(?wkt, \"POLYGON((-78 39, -77 39, -77 38, -78 38, -78 39))\"^^geo:wktLiteral)) " // +// + "}";// +// tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, queryString); +// +// tupleHandler = new CountingResultHandler(); +// tupleQuery.evaluate(tupleHandler); +// log.info("Result count : " + tupleHandler.getCount()); +// Validate.isTrue(tupleHandler.getCount() >= 1); // may see points from during previous runs +// +// // ring outside point +// queryString = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "// +// + "PREFIX geof: <http://www.opengis.net/def/function/geosparql/> "// +// + "SELECT ?feature ?point ?wkt " // +// + "{" // +// + " ?feature a geo:Feature . "// +// + " ?feature geo:hasGeometry ?point . "// +// + " ?point a geo:Point . "// +// + " ?point geo:asWKT ?wkt . "// +// + " FILTER(geof:sfWithin(?wkt, \"POLYGON((-77 39, -76 39, -76 38, -77 38, -77 39))\"^^geo:wktLiteral)) " // +// + "}";// +// tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, queryString); +// +// tupleHandler = new CountingResultHandler(); +// tupleQuery.evaluate(tupleHandler); +// log.info("Result count : " + tupleHandler.getCount()); +// Validate.isTrue(tupleHandler.getCount() == 0); +// } private static void closeQuietly(SailRepository repository) { if (repository != null) { @@ -272,7 +272,7 @@ public class MongoRyaDirectExample { conf.set(MongoDBRdfConfiguration.MONGO_DB_NAME, MONGO_DB); conf.set(MongoDBRdfConfiguration.MONGO_COLLECTION_PREFIX, MONGO_COLL_PREFIX); conf.set(ConfigUtils.GEO_PREDICATES_LIST, "http://www.opengis.net/ont/geosparql#asWKT"); - conf.set(ConfigUtils.USE_GEO, "true"); +// conf.set(ConfigUtils.USE_GEO, "true"); conf.set(ConfigUtils.USE_FREETEXT, "true"); conf.setTablePrefix(MONGO_COLL_PREFIX); conf.set(ConfigUtils.GEO_PREDICATES_LIST, GeoConstants.GEO_AS_WKT.stringValue());
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/7727b165/extras/indexingExample/src/main/java/RyaDirectExample.java ---------------------------------------------------------------------- diff --git a/extras/indexingExample/src/main/java/RyaDirectExample.java b/extras/indexingExample/src/main/java/RyaDirectExample.java index 03d23f9..8c609b4 100644 --- a/extras/indexingExample/src/main/java/RyaDirectExample.java +++ b/extras/indexingExample/src/main/java/RyaDirectExample.java @@ -17,10 +17,7 @@ * under the License. */ -import java.io.File; -import java.io.FileInputStream; import java.net.UnknownHostException; -import java.util.Collection; import java.util.List; import org.apache.accumulo.core.client.AccumuloException; @@ -36,7 +33,6 @@ import org.apache.log4j.Logger; import org.apache.rya.indexing.pcj.storage.PcjException; import org.apache.rya.indexing.pcj.storage.accumulo.PcjTables; import org.apache.rya.indexing.pcj.storage.accumulo.PcjVarOrderFactory; -import org.openrdf.model.Statement; import org.openrdf.model.URI; import org.openrdf.model.ValueFactory; import org.openrdf.model.impl.LiteralImpl; @@ -56,10 +52,6 @@ import org.openrdf.query.UpdateExecutionException; import org.openrdf.repository.RepositoryException; import org.openrdf.repository.sail.SailRepository; import org.openrdf.repository.sail.SailRepositoryConnection; -import org.openrdf.rio.RDFFormat; -import org.openrdf.rio.RDFParser; -import org.openrdf.rio.Rio; -import org.openrdf.rio.helpers.StatementCollector; import org.openrdf.sail.Sail; import org.openrdf.sail.SailException; @@ -68,8 +60,8 @@ import com.google.common.base.Optional; import mvm.rya.accumulo.AccumuloRdfConfiguration; import mvm.rya.api.RdfCloudTripleStoreConfiguration; import mvm.rya.api.persist.RyaDAOException; +import mvm.rya.indexing.GeoConstants; import mvm.rya.indexing.accumulo.ConfigUtils; -import mvm.rya.indexing.accumulo.geo.GeoConstants; import mvm.rya.indexing.external.PrecomputedJoinIndexerConfig; import mvm.rya.indexing.external.PrecomputedJoinIndexerConfig.PrecomputedJoinStorageType; import mvm.rya.rdftriplestore.inference.InferenceEngineException; @@ -114,18 +106,18 @@ public class RyaDirectExample { testAddAndTemporalSearchWithPCJ(conn); log.info("Running SAIL/SPARQL Example: Add and Free Text Search with PCJ"); testAddAndFreeTextSearchWithPCJ(conn); - log.info("Running SPARQL Example: Add Point and Geo Search with PCJ"); - testAddPointAndWithinSearchWithPCJ(conn); - log.info("Running SPARQL Example: Temporal, Freetext, and Geo Search"); - testTemporalFreeGeoSearch(conn); - log.info("Running SPARQL Example: Geo, Freetext, and PCJ Search"); - testGeoFreetextWithPCJSearch(conn); +// log.info("Running SPARQL Example: Add Point and Geo Search with PCJ"); +//// testAddPointAndWithinSearchWithPCJ(conn); +// log.info("Running SPARQL Example: Temporal, Freetext, and Geo Search"); +// testTemporalFreeGeoSearch(conn); +// log.info("Running SPARQL Example: Geo, Freetext, and PCJ Search"); +// testGeoFreetextWithPCJSearch(conn); log.info("Running SPARQL Example: Delete Temporal Data"); testDeleteTemporalData(conn); log.info("Running SPARQL Example: Delete Free Text Data"); testDeleteFreeTextData(conn); - log.info("Running SPARQL Example: Delete Geo Data"); - testDeleteGeoData(conn); +// log.info("Running SPARQL Example: Delete Geo Data"); +// testDeleteGeoData(conn); log.info("TIME: " + (System.currentTimeMillis() - start) / 1000.); } finally { @@ -161,7 +153,6 @@ public class RyaDirectExample { conf.setBoolean(ConfigUtils.USE_MOCK_INSTANCE, USE_MOCK_INSTANCE); conf.set(ConfigUtils.USE_PCJ, "true"); - conf.set(ConfigUtils.USE_GEO, "true"); conf.set(ConfigUtils.USE_FREETEXT, "true"); conf.set(ConfigUtils.USE_TEMPORAL, "true"); conf.set(PrecomputedJoinIndexerConfig.PCJ_STORAGE_TYPE, PrecomputedJoinStorageType.ACCUMULO.name()); @@ -428,197 +419,197 @@ public class RyaDirectExample { Validate.isTrue(tupleHandler.getCount() == 1); } - private static void testAddPointAndWithinSearchWithPCJ( - final SailRepositoryConnection conn) throws Exception { - - final String update = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "// - + "INSERT DATA { " // - + " <urn:feature> a geo:Feature ; " // - + " geo:hasGeometry [ " // - + " a geo:Point ; " // - + " geo:asWKT \"Point(-77.03524 38.889468)\"^^geo:wktLiteral "// - + " ] . " // - + "}"; - - final Update u = conn.prepareUpdate(QueryLanguage.SPARQL, update); - u.execute(); - - String queryString; - TupleQuery tupleQuery; - CountingResultHandler tupleHandler; - - // point outside search ring - queryString = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "// - + "PREFIX geof: <http://www.opengis.net/def/function/geosparql/> "// - + "SELECT ?feature ?point ?wkt " // - + "{" // - + " ?feature a geo:Feature . "// - + " ?feature geo:hasGeometry ?point . "// - + " ?point a geo:Point . "// - + " ?point geo:asWKT ?wkt . "// - + " FILTER(geof:sfWithin(?wkt, \"POLYGON((-77 39, -76 39, -76 38, -77 38, -77 39))\"^^geo:wktLiteral)) " // - + "}";// - tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, queryString); - tupleHandler = new CountingResultHandler(); - tupleQuery.evaluate(tupleHandler); - log.info("Result count : " + tupleHandler.getCount()); - Validate.isTrue(tupleHandler.getCount() == 0); - - // point inside search ring - queryString = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "// - + "PREFIX geof: <http://www.opengis.net/def/function/geosparql/> "// - + "SELECT ?feature ?point ?wkt ?e ?l ?o" // - + "{" // - + " ?feature a ?e . "// - + " ?e <http://www.w3.org/2000/01/rdf-schema#label> ?l . "// - + " ?e <uri:talksTo> ?o . "// - + " ?feature a geo:Feature . "// - + " ?feature geo:hasGeometry ?point . "// - + " ?point a geo:Point . "// - + " ?point geo:asWKT ?wkt . "// - + " FILTER(geof:sfWithin(?wkt, \"POLYGON((-78 39, -77 39, -77 38, -78 38, -78 39))\"^^geo:wktLiteral)) " // - + "}";// - - tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, queryString); - tupleHandler = new CountingResultHandler(); - tupleQuery.evaluate(tupleHandler); - log.info("Result count : " + tupleHandler.getCount()); - Validate.isTrue(tupleHandler.getCount() == 1); - - // point inside search ring with Pre-Computed Join - queryString = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "// - + "PREFIX geof: <http://www.opengis.net/def/function/geosparql/> "// - + "SELECT ?feature ?point ?wkt ?e ?l ?o" // - + "{" // - + " ?feature a ?e . "// - + " ?e <http://www.w3.org/2000/01/rdf-schema#label> ?l . "// - + " ?e <uri:talksTo> ?o . "// - + " ?feature a geo:Feature . "// - + " ?feature geo:hasGeometry ?point . "// - + " ?point a geo:Point . "// - + " ?point geo:asWKT ?wkt . "// - + " FILTER(geof:sfWithin(?wkt, \"POLYGON((-78 39, -77 39, -77 38, -78 38, -78 39))\"^^geo:wktLiteral)) " // - + "}";// - - tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, queryString); - tupleHandler = new CountingResultHandler(); - tupleQuery.evaluate(tupleHandler); - log.info("Result count : " + tupleHandler.getCount()); - Validate.isTrue(tupleHandler.getCount() >= 1); // may see points from - // during previous runs - - // point outside search ring with PCJ - queryString = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "// - + "PREFIX geof: <http://www.opengis.net/def/function/geosparql/> "// - + "SELECT ?feature ?point ?wkt ?e ?l ?o " // - + "{" // - + " ?feature a ?e . "// - + " ?e <http://www.w3.org/2000/01/rdf-schema#label> ?l . "// - + " ?e <uri:talksTo> ?o . "// - + " ?feature a geo:Feature . "// - + " ?feature geo:hasGeometry ?point . "// - + " ?point a geo:Point . "// - + " ?point geo:asWKT ?wkt . "// - + " FILTER(geof:sfWithin(?wkt, \"POLYGON((-77 39, -76 39, -76 38, -77 38, -77 39))\"^^geo:wktLiteral)) " // - + "}";// - tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, queryString); - tupleHandler = new CountingResultHandler(); - tupleQuery.evaluate(tupleHandler); - log.info("Result count : " + tupleHandler.getCount()); - Validate.isTrue(tupleHandler.getCount() == 0); - - // point inside search ring with different Pre-Computed Join - queryString = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "// - + "PREFIX geof: <http://www.opengis.net/def/function/geosparql/> "// - + "SELECT ?feature ?point ?wkt ?e ?c ?l ?o " // - + "{" // - + " ?e a ?c . "// - + " ?e <http://www.w3.org/2000/01/rdf-schema#label> ?l . "// - + " ?e <uri:talksTo> ?o . "// - + " ?feature a geo:Feature . "// - + " ?feature geo:hasGeometry ?point . "// - + " ?point a geo:Point . "// - + " ?point geo:asWKT ?wkt . "// - + " FILTER(geof:sfWithin(?wkt, \"POLYGON((-78 39, -77 39, -77 38, -78 38, -78 39))\"^^geo:wktLiteral)) " // - + "}";// - tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, queryString); - tupleHandler = new CountingResultHandler(); - tupleQuery.evaluate(tupleHandler); - log.info("Result count : " + tupleHandler.getCount()); - Validate.isTrue(tupleHandler.getCount() == 1); - } - - private static void testTemporalFreeGeoSearch( - final SailRepositoryConnection conn) - throws MalformedQueryException, RepositoryException, - UpdateExecutionException, TupleQueryResultHandlerException, - QueryEvaluationException { - - String queryString; - TupleQuery tupleQuery; - CountingResultHandler tupleHandler; - - // ring containing point - queryString = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "// - + "PREFIX geof: <http://www.opengis.net/def/function/geosparql/> "// - + "PREFIX time: <http://www.w3.org/2006/time#> "// - + "PREFIX tempo: <tag:rya-rdf.org,2015:temporal#> "// - + "PREFIX fts: <http://rdf.useekm.com/fts#> "// - + "SELECT ?feature ?point ?wkt ?event ?time ?person ?match" // - + "{" // - + " ?event a time:Instant . \n"// - + " ?event time:inXSDDateTime ?time . \n"// - + " FILTER(tempo:after(?time, '2001-01-01T01:01:03-08:00') ) \n"// after - // 3 - // seconds - + " ?feature a geo:Feature . "// - + " ?feature geo:hasGeometry ?point . "// - + " ?point a geo:Point . "// - + " ?point geo:asWKT ?wkt . "// - + " FILTER(geof:sfWithin(?wkt, \"POLYGON((-78 39, -77 39, -77 38, -78 38, -78 39))\"^^geo:wktLiteral)). " // - + " ?person a <http://example.org/ontology/Person> . "// - + " ?person <http://www.w3.org/2000/01/rdf-schema#label> ?match . "// - + " FILTER(fts:text(?match, \"pal*\")) " // - + "}";// - - tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, queryString); - - tupleHandler = new CountingResultHandler(); - tupleQuery.evaluate(tupleHandler); - log.info("Result count : " + tupleHandler.getCount()); - Validate.isTrue(tupleHandler.getCount() == 5); - - } - - private static void testGeoFreetextWithPCJSearch( - final SailRepositoryConnection conn) - throws MalformedQueryException, RepositoryException, - TupleQueryResultHandlerException, QueryEvaluationException { - // ring outside point - final String queryString = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "// - + "PREFIX fts: <http://rdf.useekm.com/fts#> "// - + "PREFIX geof: <http://www.opengis.net/def/function/geosparql/> "// - + "SELECT ?feature ?point ?wkt ?e ?c ?l ?o ?person ?match " // - + "{" // - + " ?person a <http://example.org/ontology/Person> . "// - + " ?person <http://www.w3.org/2000/01/rdf-schema#label> ?match . "// - + " FILTER(fts:text(?match, \"!alice & hose\")) " // - + " ?e a ?c . "// - + " ?e <http://www.w3.org/2000/01/rdf-schema#label> ?l . "// - + " ?e <uri:talksTo> ?o . "// - + " ?feature a geo:Feature . "// - + " ?feature geo:hasGeometry ?point . "// - + " ?point a geo:Point . "// - + " ?point geo:asWKT ?wkt . "// - + " FILTER(geof:sfWithin(?wkt, \"POLYGON((-78 39, -77 39, -77 38, -78 38, -78 39))\"^^geo:wktLiteral)) " // - + "}";// - final TupleQuery tupleQuery = conn.prepareTupleQuery( - QueryLanguage.SPARQL, queryString); - final CountingResultHandler tupleHandler = new CountingResultHandler(); - tupleQuery.evaluate(tupleHandler); - log.info("Result count : " + tupleHandler.getCount()); - Validate.isTrue(tupleHandler.getCount() == 1); - } +// private static void testAddPointAndWithinSearchWithPCJ( +// final SailRepositoryConnection conn) throws Exception { +// +// final String update = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "// +// + "INSERT DATA { " // +// + " <urn:feature> a geo:Feature ; " // +// + " geo:hasGeometry [ " // +// + " a geo:Point ; " // +// + " geo:asWKT \"Point(-77.03524 38.889468)\"^^geo:wktLiteral "// +// + " ] . " // +// + "}"; +// +// final Update u = conn.prepareUpdate(QueryLanguage.SPARQL, update); +// u.execute(); +// +// String queryString; +// TupleQuery tupleQuery; +// CountingResultHandler tupleHandler; +// +// // point outside search ring +// queryString = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "// +// + "PREFIX geof: <http://www.opengis.net/def/function/geosparql/> "// +// + "SELECT ?feature ?point ?wkt " // +// + "{" // +// + " ?feature a geo:Feature . "// +// + " ?feature geo:hasGeometry ?point . "// +// + " ?point a geo:Point . "// +// + " ?point geo:asWKT ?wkt . "// +// + " FILTER(geof:sfWithin(?wkt, \"POLYGON((-77 39, -76 39, -76 38, -77 38, -77 39))\"^^geo:wktLiteral)) " // +// + "}";// +// tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, queryString); +// tupleHandler = new CountingResultHandler(); +// tupleQuery.evaluate(tupleHandler); +// log.info("Result count : " + tupleHandler.getCount()); +// Validate.isTrue(tupleHandler.getCount() == 0); +// +// // point inside search ring +// queryString = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "// +// + "PREFIX geof: <http://www.opengis.net/def/function/geosparql/> "// +// + "SELECT ?feature ?point ?wkt ?e ?l ?o" // +// + "{" // +// + " ?feature a ?e . "// +// + " ?e <http://www.w3.org/2000/01/rdf-schema#label> ?l . "// +// + " ?e <uri:talksTo> ?o . "// +// + " ?feature a geo:Feature . "// +// + " ?feature geo:hasGeometry ?point . "// +// + " ?point a geo:Point . "// +// + " ?point geo:asWKT ?wkt . "// +// + " FILTER(geof:sfWithin(?wkt, \"POLYGON((-78 39, -77 39, -77 38, -78 38, -78 39))\"^^geo:wktLiteral)) " // +// + "}";// +// +// tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, queryString); +// tupleHandler = new CountingResultHandler(); +// tupleQuery.evaluate(tupleHandler); +// log.info("Result count : " + tupleHandler.getCount()); +// Validate.isTrue(tupleHandler.getCount() == 1); +// +// // point inside search ring with Pre-Computed Join +// queryString = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "// +// + "PREFIX geof: <http://www.opengis.net/def/function/geosparql/> "// +// + "SELECT ?feature ?point ?wkt ?e ?l ?o" // +// + "{" // +// + " ?feature a ?e . "// +// + " ?e <http://www.w3.org/2000/01/rdf-schema#label> ?l . "// +// + " ?e <uri:talksTo> ?o . "// +// + " ?feature a geo:Feature . "// +// + " ?feature geo:hasGeometry ?point . "// +// + " ?point a geo:Point . "// +// + " ?point geo:asWKT ?wkt . "// +// + " FILTER(geof:sfWithin(?wkt, \"POLYGON((-78 39, -77 39, -77 38, -78 38, -78 39))\"^^geo:wktLiteral)) " // +// + "}";// +// +// tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, queryString); +// tupleHandler = new CountingResultHandler(); +// tupleQuery.evaluate(tupleHandler); +// log.info("Result count : " + tupleHandler.getCount()); +// Validate.isTrue(tupleHandler.getCount() >= 1); // may see points from +// // during previous runs +// +// // point outside search ring with PCJ +// queryString = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "// +// + "PREFIX geof: <http://www.opengis.net/def/function/geosparql/> "// +// + "SELECT ?feature ?point ?wkt ?e ?l ?o " // +// + "{" // +// + " ?feature a ?e . "// +// + " ?e <http://www.w3.org/2000/01/rdf-schema#label> ?l . "// +// + " ?e <uri:talksTo> ?o . "// +// + " ?feature a geo:Feature . "// +// + " ?feature geo:hasGeometry ?point . "// +// + " ?point a geo:Point . "// +// + " ?point geo:asWKT ?wkt . "// +// + " FILTER(geof:sfWithin(?wkt, \"POLYGON((-77 39, -76 39, -76 38, -77 38, -77 39))\"^^geo:wktLiteral)) " // +// + "}";// +// tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, queryString); +// tupleHandler = new CountingResultHandler(); +// tupleQuery.evaluate(tupleHandler); +// log.info("Result count : " + tupleHandler.getCount()); +// Validate.isTrue(tupleHandler.getCount() == 0); +// +// // point inside search ring with different Pre-Computed Join +// queryString = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "// +// + "PREFIX geof: <http://www.opengis.net/def/function/geosparql/> "// +// + "SELECT ?feature ?point ?wkt ?e ?c ?l ?o " // +// + "{" // +// + " ?e a ?c . "// +// + " ?e <http://www.w3.org/2000/01/rdf-schema#label> ?l . "// +// + " ?e <uri:talksTo> ?o . "// +// + " ?feature a geo:Feature . "// +// + " ?feature geo:hasGeometry ?point . "// +// + " ?point a geo:Point . "// +// + " ?point geo:asWKT ?wkt . "// +// + " FILTER(geof:sfWithin(?wkt, \"POLYGON((-78 39, -77 39, -77 38, -78 38, -78 39))\"^^geo:wktLiteral)) " // +// + "}";// +// tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, queryString); +// tupleHandler = new CountingResultHandler(); +// tupleQuery.evaluate(tupleHandler); +// log.info("Result count : " + tupleHandler.getCount()); +// Validate.isTrue(tupleHandler.getCount() == 1); +// } +// +// private static void testTemporalFreeGeoSearch( +// final SailRepositoryConnection conn) +// throws MalformedQueryException, RepositoryException, +// UpdateExecutionException, TupleQueryResultHandlerException, +// QueryEvaluationException { +// +// String queryString; +// TupleQuery tupleQuery; +// CountingResultHandler tupleHandler; +// +// // ring containing point +// queryString = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "// +// + "PREFIX geof: <http://www.opengis.net/def/function/geosparql/> "// +// + "PREFIX time: <http://www.w3.org/2006/time#> "// +// + "PREFIX tempo: <tag:rya-rdf.org,2015:temporal#> "// +// + "PREFIX fts: <http://rdf.useekm.com/fts#> "// +// + "SELECT ?feature ?point ?wkt ?event ?time ?person ?match" // +// + "{" // +// + " ?event a time:Instant . \n"// +// + " ?event time:inXSDDateTime ?time . \n"// +// + " FILTER(tempo:after(?time, '2001-01-01T01:01:03-08:00') ) \n"// after +// // 3 +// // seconds +// + " ?feature a geo:Feature . "// +// + " ?feature geo:hasGeometry ?point . "// +// + " ?point a geo:Point . "// +// + " ?point geo:asWKT ?wkt . "// +// + " FILTER(geof:sfWithin(?wkt, \"POLYGON((-78 39, -77 39, -77 38, -78 38, -78 39))\"^^geo:wktLiteral)). " // +// + " ?person a <http://example.org/ontology/Person> . "// +// + " ?person <http://www.w3.org/2000/01/rdf-schema#label> ?match . "// +// + " FILTER(fts:text(?match, \"pal*\")) " // +// + "}";// +// +// tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, queryString); +// +// tupleHandler = new CountingResultHandler(); +// tupleQuery.evaluate(tupleHandler); +// log.info("Result count : " + tupleHandler.getCount()); +// Validate.isTrue(tupleHandler.getCount() == 5); +// +// } +// +// private static void testGeoFreetextWithPCJSearch( +// final SailRepositoryConnection conn) +// throws MalformedQueryException, RepositoryException, +// TupleQueryResultHandlerException, QueryEvaluationException { +// // ring outside point +// final String queryString = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "// +// + "PREFIX fts: <http://rdf.useekm.com/fts#> "// +// + "PREFIX geof: <http://www.opengis.net/def/function/geosparql/> "// +// + "SELECT ?feature ?point ?wkt ?e ?c ?l ?o ?person ?match " // +// + "{" // +// + " ?person a <http://example.org/ontology/Person> . "// +// + " ?person <http://www.w3.org/2000/01/rdf-schema#label> ?match . "// +// + " FILTER(fts:text(?match, \"!alice & hose\")) " // +// + " ?e a ?c . "// +// + " ?e <http://www.w3.org/2000/01/rdf-schema#label> ?l . "// +// + " ?e <uri:talksTo> ?o . "// +// + " ?feature a geo:Feature . "// +// + " ?feature geo:hasGeometry ?point . "// +// + " ?point a geo:Point . "// +// + " ?point geo:asWKT ?wkt . "// +// + " FILTER(geof:sfWithin(?wkt, \"POLYGON((-78 39, -77 39, -77 38, -78 38, -78 39))\"^^geo:wktLiteral)) " // +// + "}";// +// final TupleQuery tupleQuery = conn.prepareTupleQuery( +// QueryLanguage.SPARQL, queryString); +// final CountingResultHandler tupleHandler = new CountingResultHandler(); +// tupleQuery.evaluate(tupleHandler); +// log.info("Result count : " + tupleHandler.getCount()); +// Validate.isTrue(tupleHandler.getCount() == 1); +// } private static void testDeleteTemporalData( final SailRepositoryConnection conn) throws Exception { @@ -693,46 +684,46 @@ public class RyaDirectExample { Validate.isTrue(tupleHandler.getCount() == 0); } - private static void testDeleteGeoData(final SailRepositoryConnection conn) - throws Exception { - // Delete all stored points - final String sparqlDelete = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "// - + "PREFIX geof: <http://www.opengis.net/def/function/geosparql/> "// - + "DELETE {\n" // - + " ?feature a geo:Feature . "// - + " ?feature geo:hasGeometry ?point . "// - + " ?point a geo:Point . "// - + " ?point geo:asWKT ?wkt . "// - + "}\n" + "WHERE { \n" + " ?feature a geo:Feature . "// - + " ?feature geo:hasGeometry ?point . "// - + " ?point a geo:Point . "// - + " ?point geo:asWKT ?wkt . "// - + "}";// - - final Update deleteUpdate = conn.prepareUpdate(QueryLanguage.SPARQL, - sparqlDelete); - deleteUpdate.execute(); - - String queryString; - TupleQuery tupleQuery; - CountingResultHandler tupleHandler; - - // Find all stored points - queryString = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "// - + "PREFIX geof: <http://www.opengis.net/def/function/geosparql/> "// - + "SELECT ?feature ?point ?wkt " // - + "{" // - + " ?feature a geo:Feature . "// - + " ?feature geo:hasGeometry ?point . "// - + " ?point a geo:Point . "// - + " ?point geo:asWKT ?wkt . "// - + "}";// - tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, queryString); - tupleHandler = new CountingResultHandler(); - tupleQuery.evaluate(tupleHandler); - log.info("Result count : " + tupleHandler.getCount()); - Validate.isTrue(tupleHandler.getCount() == 0); - } +// private static void testDeleteGeoData(final SailRepositoryConnection conn) +// throws Exception { +// // Delete all stored points +// final String sparqlDelete = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "// +// + "PREFIX geof: <http://www.opengis.net/def/function/geosparql/> "// +// + "DELETE {\n" // +// + " ?feature a geo:Feature . "// +// + " ?feature geo:hasGeometry ?point . "// +// + " ?point a geo:Point . "// +// + " ?point geo:asWKT ?wkt . "// +// + "}\n" + "WHERE { \n" + " ?feature a geo:Feature . "// +// + " ?feature geo:hasGeometry ?point . "// +// + " ?point a geo:Point . "// +// + " ?point geo:asWKT ?wkt . "// +// + "}";// +// +// final Update deleteUpdate = conn.prepareUpdate(QueryLanguage.SPARQL, +// sparqlDelete); +// deleteUpdate.execute(); +// +// String queryString; +// TupleQuery tupleQuery; +// CountingResultHandler tupleHandler; +// +// // Find all stored points +// queryString = "PREFIX geo: <http://www.opengis.net/ont/geosparql#> "// +// + "PREFIX geof: <http://www.opengis.net/def/function/geosparql/> "// +// + "SELECT ?feature ?point ?wkt " // +// + "{" // +// + " ?feature a geo:Feature . "// +// + " ?feature geo:hasGeometry ?point . "// +// + " ?point a geo:Point . "// +// + " ?point geo:asWKT ?wkt . "// +// + "}";// +// tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, queryString); +// tupleHandler = new CountingResultHandler(); +// tupleQuery.evaluate(tupleHandler); +// log.info("Result count : " + tupleHandler.getCount()); +// Validate.isTrue(tupleHandler.getCount() == 0); +// } private static void createPCJ(final Configuration conf) throws RepositoryException, AccumuloException, http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/7727b165/extras/pom.xml ---------------------------------------------------------------------- diff --git a/extras/pom.xml b/extras/pom.xml index 0ffeb7a..8131627 100644 --- a/extras/pom.xml +++ b/extras/pom.xml @@ -44,4 +44,13 @@ under the License. <module>rya.merger</module> <module>rya.benchmark</module> </modules> + + <profiles> + <profile> + <id>geoindexing</id> + <modules> + <module>rya.geoindexing</module> + </modules> + </profile> + </profiles> </project> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/7727b165/extras/rya.geoindexing/pom.xml ---------------------------------------------------------------------- diff --git a/extras/rya.geoindexing/pom.xml b/extras/rya.geoindexing/pom.xml new file mode 100644 index 0000000..a1721c8 --- /dev/null +++ b/extras/rya.geoindexing/pom.xml @@ -0,0 +1,224 @@ +<?xml version='1.0'?> + +<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor + license agreements. See the NOTICE file distributed with this work for additional + information regarding copyright ownership. The ASF licenses this file to + you under the Apache License, Version 2.0 (the "License"); you may not use + this file except in compliance with the License. You may obtain a copy of + the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required + by applicable law or agreed to in writing, software distributed under the + License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS + OF ANY KIND, either express or implied. See the License for the specific + language governing permissions and limitations under the License. --> + +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>org.apache.rya</groupId> + <artifactId>rya.extras</artifactId> + <version>3.2.10-SNAPSHOT</version> + </parent> + + <artifactId>rya.geoindexing</artifactId> + <name>Apache Rya Secondary Indexing</name> + + <properties> + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> + <geotools.version>14.3</geotools.version> + </properties> + <repositories> + <repository> + <id>osgeo</id> + <name>Open Source Geospatial Foundation Repository</name> + <url>http://download.osgeo.org/webdav/geotools/</url> + </repository> + </repositories> + + <dependencies> + + <dependency> + <groupId>org.apache.accumulo</groupId> + <artifactId>accumulo-minicluster</artifactId> + <version>${accumulo.version}</version> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>org.apache.rya</groupId> + <artifactId>rya.sail</artifactId> + <exclusions> + <exclusion> + <artifactId>hsqldb</artifactId> + <groupId>hsqldb</groupId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>org.apache.rya</groupId> + <artifactId>rya.indexing</artifactId> + </dependency> + <dependency> + <groupId>org.apache.rya</groupId> + <artifactId>accumulo.rya</artifactId> + </dependency> + <dependency> + <groupId>org.apache.rya</groupId> + <artifactId>mongodb.rya</artifactId> + </dependency> + <dependency> + <groupId>org.apache.rya</groupId> + <artifactId>rya.prospector</artifactId> + </dependency> + + <!-- Free Text Indexing --> + <dependency> + <groupId>org.apache.lucene</groupId> + <artifactId>lucene-core</artifactId> + </dependency> + <dependency> + <groupId>org.apache.lucene</groupId> + <artifactId>lucene-analyzers</artifactId> + </dependency> + + <dependency> + <groupId>commons-codec</groupId> + <artifactId>commons-codec</artifactId> + </dependency> + + <!-- Geo Indexing --> + <dependency> + <groupId>org.locationtech.geomesa</groupId> + <artifactId>geomesa-accumulo-datastore</artifactId> + </dependency> + + <!-- PCJ Indexing --> + <dependency> + <groupId>org.apache.rya</groupId> + <artifactId>rya.indexing.pcj</artifactId> + </dependency> + <dependency> + <groupId>org.apache.rya</groupId> + <artifactId>rya.pcj.fluo.api</artifactId> + </dependency> + + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.mockito</groupId> + <artifactId>mockito-all</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>io.fluo</groupId> + <artifactId>fluo-mini</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.apache.rya</groupId> + <artifactId>accumulo.rya</artifactId> + <version>${project.version}</version> + <type>test-jar</type> + </dependency> + <dependency> + <groupId>org.geotools.xsd</groupId> + <artifactId>gt-xsd-gml3</artifactId> + <version>${geotools.version}</version> + </dependency> + <dependency> + <groupId>org.geotools</groupId> + <artifactId>gt-api</artifactId> + <version>${geotools.version}</version> + </dependency> + <dependency> + <groupId>com.vividsolutions</groupId> + <artifactId>jts</artifactId> + <version>1.13</version> + </dependency> + </dependencies> + <build> + <pluginManagement> + <plugins> + <plugin> + <groupId>org.apache.rat</groupId> + <artifactId>apache-rat-plugin</artifactId> + <configuration> + <excludes> + <!-- RDF data Files --> + <exclude>**/*.ttl</exclude> + + <!-- Services Files --> + <exclude>**/resources/META-INF/services/**</exclude> + </excludes> + </configuration> + </plugin> + </plugins> + </pluginManagement> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-shade-plugin</artifactId> + <executions> + <execution> + <configuration> + <shadedArtifactAttached>true</shadedArtifactAttached> + <shadedClassifierName>map-reduce</shadedClassifierName> + <transformers> + <transformer + implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" /> + </transformers> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> + </configuration> + </execution> + <execution> + <id>accumulo-server</id> + <phase>package</phase> + <goals> + <goal>shade</goal> + </goals> + <configuration> + <shadedArtifactAttached>true</shadedArtifactAttached> + <shadedClassifierName>accumulo-server</shadedClassifierName> + <artifactSet> + <excludes> + <exclude>org.locationtech.geomesa:*</exclude> + <exclude>scala:*</exclude> + <exclude>org.apache.accumulo:*</exclude> + <exclude>org.apache.thrift:*</exclude> + <exclude>org.apache.hadoop:*</exclude> + <exclude>org.apache.zookeeper:*</exclude> + </excludes> + </artifactSet> + <transformers> + <transformer + implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" /> + </transformers> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> + </configuration> + </execution> + </executions> + </plugin> + </plugins> + </build> +</project> http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/7727b165/extras/rya.geoindexing/src/main/java/mvm/rya/indexing/GeoEnabledFilterFunctionOptimizer.java ---------------------------------------------------------------------- diff --git a/extras/rya.geoindexing/src/main/java/mvm/rya/indexing/GeoEnabledFilterFunctionOptimizer.java b/extras/rya.geoindexing/src/main/java/mvm/rya/indexing/GeoEnabledFilterFunctionOptimizer.java new file mode 100644 index 0000000..45d1f1a --- /dev/null +++ b/extras/rya.geoindexing/src/main/java/mvm/rya/indexing/GeoEnabledFilterFunctionOptimizer.java @@ -0,0 +1,353 @@ +package mvm.rya.indexing; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +import java.io.IOException; +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import org.apache.accumulo.core.client.AccumuloException; +import org.apache.accumulo.core.client.AccumuloSecurityException; +import org.apache.accumulo.core.client.TableExistsException; +import org.apache.accumulo.core.client.TableNotFoundException; +import org.apache.commons.lang.Validate; +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.log4j.Logger; +import org.geotools.feature.SchemaException; +import org.openrdf.model.Resource; +import org.openrdf.model.URI; +import org.openrdf.model.Value; +import org.openrdf.model.ValueFactory; +import org.openrdf.model.impl.URIImpl; +import org.openrdf.model.impl.ValueFactoryImpl; +import org.openrdf.query.BindingSet; +import org.openrdf.query.Dataset; +import org.openrdf.query.algebra.And; +import org.openrdf.query.algebra.Filter; +import org.openrdf.query.algebra.FunctionCall; +import org.openrdf.query.algebra.Join; +import org.openrdf.query.algebra.LeftJoin; +import org.openrdf.query.algebra.QueryModelNode; +import org.openrdf.query.algebra.StatementPattern; +import org.openrdf.query.algebra.TupleExpr; +import org.openrdf.query.algebra.ValueConstant; +import org.openrdf.query.algebra.ValueExpr; +import org.openrdf.query.algebra.Var; +import org.openrdf.query.algebra.evaluation.QueryOptimizer; +import org.openrdf.query.algebra.helpers.QueryModelVisitorBase; + +import com.google.common.collect.Lists; + +import mvm.rya.accumulo.AccumuloRdfConfiguration; +import mvm.rya.indexing.IndexingFunctionRegistry.FUNCTION_TYPE; +import mvm.rya.indexing.accumulo.ConfigUtils; +import mvm.rya.indexing.accumulo.freetext.AccumuloFreeTextIndexer; +import mvm.rya.indexing.accumulo.freetext.FreeTextTupleSet; +import mvm.rya.indexing.accumulo.geo.GeoMesaGeoIndexer; +import mvm.rya.indexing.accumulo.geo.GeoTupleSet; +import mvm.rya.indexing.accumulo.temporal.AccumuloTemporalIndexer; +import mvm.rya.indexing.mongodb.freetext.MongoFreeTextIndexer; +import mvm.rya.indexing.mongodb.geo.MongoGeoIndexer; +import mvm.rya.indexing.mongodb.temporal.MongoTemporalIndexer; + +public class GeoEnabledFilterFunctionOptimizer implements QueryOptimizer, Configurable { + private static final Logger LOG = Logger.getLogger(GeoEnabledFilterFunctionOptimizer.class); + private final ValueFactory valueFactory = new ValueFactoryImpl(); + + private Configuration conf; + private GeoIndexer geoIndexer; + private FreeTextIndexer freeTextIndexer; + private TemporalIndexer temporalIndexer; + private boolean init = false; + + public GeoEnabledFilterFunctionOptimizer() { + } + + public GeoEnabledFilterFunctionOptimizer(final AccumuloRdfConfiguration conf) throws AccumuloException, AccumuloSecurityException, + TableNotFoundException, IOException, SchemaException, TableExistsException, NumberFormatException, UnknownHostException { + this.conf = conf; + init(); + } + + //setConf initializes FilterFunctionOptimizer so reflection can be used + //to create optimizer in RdfCloudTripleStoreConnection + @Override + public void setConf(final Configuration conf) { + this.conf = conf; + //reset the init. + init = false; + init(); + } + + private synchronized void init() { + if (!init) { + if (ConfigUtils.getUseMongo(conf)) { + geoIndexer = new MongoGeoIndexer(); + geoIndexer.setConf(conf); + freeTextIndexer = new MongoFreeTextIndexer(); + freeTextIndexer.setConf(conf); + temporalIndexer = new MongoTemporalIndexer(); + temporalIndexer.setConf(conf); + } else { + geoIndexer = new GeoMesaGeoIndexer(); + geoIndexer.setConf(conf); + freeTextIndexer = new AccumuloFreeTextIndexer(); + freeTextIndexer.setConf(conf); + temporalIndexer = new AccumuloTemporalIndexer(); + temporalIndexer.setConf(conf); + } + init = true; + } + } + + @Override + public void optimize(final TupleExpr tupleExpr, final Dataset dataset, final BindingSet bindings) { + // find variables used in property and resource based searches: + final SearchVarVisitor searchVars = new SearchVarVisitor(); + tupleExpr.visit(searchVars); + // rewrites for property searches: + processPropertySearches(tupleExpr, searchVars.searchProperties); + + } + + + + private void processPropertySearches(final TupleExpr tupleExpr, final Collection<Var> searchProperties) { + final MatchStatementVisitor matchStatements = new MatchStatementVisitor(searchProperties); + tupleExpr.visit(matchStatements); + for (final StatementPattern matchStatement: matchStatements.matchStatements) { + final Var subject = matchStatement.getSubjectVar(); + if (subject.hasValue() && !(subject.getValue() instanceof Resource)) { + throw new IllegalArgumentException("Query error: Found " + subject.getValue() + ", expected an URI or BNode"); + } + Validate.isTrue(subject.hasValue() || subject.getName() != null); + Validate.isTrue(!matchStatement.getObjectVar().hasValue() && matchStatement.getObjectVar().getName() != null); + buildQuery(tupleExpr, matchStatement); + } + } + + private void buildQuery(final TupleExpr tupleExpr, final StatementPattern matchStatement) { + //If our IndexerExpr (to be) is the rhs-child of LeftJoin, we can safely make that a Join: + // the IndexerExpr will (currently) not return results that can deliver unbound variables. + //This optimization should probably be generalized into a LeftJoin -> Join optimizer under certain conditions. Until that + // has been done, this code path at least takes care of queries generated by OpenSahara SparqTool that filter on OPTIONAL + // projections. E.g. summary~'full text search' (summary is optional). See #379 + if (matchStatement.getParentNode() instanceof LeftJoin) { + final LeftJoin leftJoin = (LeftJoin)matchStatement.getParentNode(); + if (leftJoin.getRightArg() == matchStatement && leftJoin.getCondition() == null) { + matchStatement.getParentNode().replaceWith(new Join(leftJoin.getLeftArg(), leftJoin.getRightArg())); + } + } + final FilterFunction fVisitor = new FilterFunction(matchStatement.getObjectVar().getName()); + tupleExpr.visit(fVisitor); + final List<IndexingExpr> results = Lists.newArrayList(); + for(int i = 0; i < fVisitor.func.size(); i++){ + results.add(new IndexingExpr(fVisitor.func.get(i), matchStatement, fVisitor.args.get(i))); + } + removeMatchedPattern(tupleExpr, matchStatement, new IndexerExprReplacer(results)); + } + + //find vars contained in filters + private static class SearchVarVisitor extends QueryModelVisitorBase<RuntimeException> { + private final Collection<Var> searchProperties = new ArrayList<Var>(); + + @Override + public void meet(final FunctionCall fn) { + final URI fun = new URIImpl(fn.getURI()); + final Var result = IndexingFunctionRegistry.getResultVarFromFunctionCall(fun, fn.getArgs()); + if (result != null && !searchProperties.contains(result)) { + searchProperties.add(result); + } + } + } + + //find StatementPatterns containing filter variables + private static class MatchStatementVisitor extends QueryModelVisitorBase<RuntimeException> { + private final Collection<Var> propertyVars; + private final Collection<Var> usedVars = new ArrayList<Var>(); + private final List<StatementPattern> matchStatements = new ArrayList<StatementPattern>(); + + public MatchStatementVisitor(final Collection<Var> propertyVars) { + this.propertyVars = propertyVars; + } + + @Override public void meet(final StatementPattern statement) { + final Var object = statement.getObjectVar(); + if (propertyVars.contains(object)) { + if (usedVars.contains(object)) { + throw new IllegalArgumentException("Illegal search, variable is used multiple times as object: " + object.getName()); + } else { + usedVars.add(object); + matchStatements.add(statement); + } + } + } + } + + private abstract class AbstractEnhanceVisitor extends QueryModelVisitorBase<RuntimeException> { + final String matchVar; + List<URI> func = Lists.newArrayList(); + List<Value[]> args = Lists.newArrayList(); + + public AbstractEnhanceVisitor(final String matchVar) { + this.matchVar = matchVar; + } + + protected void addFilter(final URI uri, final Value[] values) { + func.add(uri); + args.add(values); + } + } + + //create indexing expression for each filter matching var in filter StatementPattern + //replace old filter condition with true condition + private class FilterFunction extends AbstractEnhanceVisitor { + public FilterFunction(final String matchVar) { + super(matchVar); + } + + @Override + public void meet(final FunctionCall call) { + final URI fnUri = valueFactory.createURI(call.getURI()); + final Var resultVar = IndexingFunctionRegistry.getResultVarFromFunctionCall(fnUri, call.getArgs()); + if (resultVar != null && resultVar.getName().equals(matchVar)) { + addFilter(valueFactory.createURI(call.getURI()), extractArguments(matchVar, call)); + if (call.getParentNode() instanceof Filter || call.getParentNode() instanceof And || call.getParentNode() instanceof LeftJoin) { + call.replaceWith(new ValueConstant(valueFactory.createLiteral(true))); + } else { + throw new IllegalArgumentException("Query error: Found " + call + " as part of an expression that is too complex"); + } + } + } + + private Value[] extractArguments(final String matchName, final FunctionCall call) { + final Value args[] = new Value[call.getArgs().size() - 1]; + int argI = 0; + for (int i = 0; i != call.getArgs().size(); ++i) { + final ValueExpr arg = call.getArgs().get(i); + if (argI == i && arg instanceof Var && matchName.equals(((Var)arg).getName())) { + continue; + } + if (arg instanceof ValueConstant) { + args[argI] = ((ValueConstant)arg).getValue(); + } else if (arg instanceof Var && ((Var)arg).hasValue()) { + args[argI] = ((Var)arg).getValue(); + } else { + throw new IllegalArgumentException("Query error: Found " + arg + ", expected a Literal, BNode or URI"); + } + ++argI; + } + return args; + } + + @Override + public void meet(final Filter filter) { + //First visit children, then condition (reverse of default): + filter.getArg().visit(this); + filter.getCondition().visit(this); + } + } + + private void removeMatchedPattern(final TupleExpr tupleExpr, final StatementPattern pattern, final TupleExprReplacer replacer) { + final List<TupleExpr> indexTuples = replacer.createReplacement(pattern); + if (indexTuples.size() > 1) { + final VarExchangeVisitor vev = new VarExchangeVisitor(pattern); + tupleExpr.visit(vev); + Join join = new Join(indexTuples.remove(0), indexTuples.remove(0)); + for (final TupleExpr geo : indexTuples) { + join = new Join(join, geo); + } + pattern.replaceWith(join); + } else if (indexTuples.size() == 1) { + pattern.replaceWith(indexTuples.get(0)); + pattern.setParentNode(null); + } else { + throw new IllegalStateException("Must have at least one replacement for matched StatementPattern."); + } + } + + private interface TupleExprReplacer { + List<TupleExpr> createReplacement(TupleExpr org); + } + + //replace each filter pertinent StatementPattern with corresponding index expr + private class IndexerExprReplacer implements TupleExprReplacer { + private final List<IndexingExpr> indxExpr; + private final FUNCTION_TYPE type; + + public IndexerExprReplacer(final List<IndexingExpr> indxExpr) { + this.indxExpr = indxExpr; + final URI func = indxExpr.get(0).getFunction(); + type = IndexingFunctionRegistry.getFunctionType(func); + } + + @Override + public List<TupleExpr> createReplacement(final TupleExpr org) { + final List<TupleExpr> indexTuples = Lists.newArrayList(); + switch (type) { + case GEO: + for (final IndexingExpr indx : indxExpr) { + indexTuples.add(new GeoTupleSet(indx, geoIndexer)); + } + break; + case FREETEXT: + for (final IndexingExpr indx : indxExpr) { + indexTuples.add(new FreeTextTupleSet(indx, freeTextIndexer)); + } + break; + case TEMPORAL: + for (final IndexingExpr indx : indxExpr) { + indexTuples.add(new TemporalTupleSet(indx, temporalIndexer)); + } + break; + default: + throw new IllegalArgumentException("Incorrect type!"); + } + return indexTuples; + } + } + + private static class VarExchangeVisitor extends QueryModelVisitorBase<RuntimeException> { + private final StatementPattern exchangeVar; + public VarExchangeVisitor(final StatementPattern sp) { + exchangeVar = sp; + } + + @Override + public void meet(final Join node) { + final QueryModelNode lNode = node.getLeftArg(); + if (lNode instanceof StatementPattern) { + exchangeVar.replaceWith(lNode); + node.setLeftArg(exchangeVar); + } else { + super.meet(node); + } + } + } + + @Override + public Configuration getConf() { + return conf; + } +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/7727b165/extras/rya.geoindexing/src/main/java/mvm/rya/indexing/GeoIndexer.java ---------------------------------------------------------------------- diff --git a/extras/rya.geoindexing/src/main/java/mvm/rya/indexing/GeoIndexer.java b/extras/rya.geoindexing/src/main/java/mvm/rya/indexing/GeoIndexer.java new file mode 100644 index 0000000..40dfeec --- /dev/null +++ b/extras/rya.geoindexing/src/main/java/mvm/rya/indexing/GeoIndexer.java @@ -0,0 +1,185 @@ +package mvm.rya.indexing; + +import org.openrdf.model.Statement; +import org.openrdf.query.QueryEvaluationException; + +import com.vividsolutions.jts.geom.Geometry; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + + +import info.aduna.iteration.CloseableIteration; +import mvm.rya.api.persist.index.RyaSecondaryIndexer; + +/** + * A repository to store, index, and retrieve {@link Statement}s based on geospatial features. + */ +public interface GeoIndexer extends RyaSecondaryIndexer { + /** + * Returns statements that contain a geometry that is equal to the queried {@link Geometry} and meet the {@link StatementConstraints}. + * + * <p> + * From Wikipedia (http://en.wikipedia.org/wiki/DE-9IM): + * <ul> + * <li> + * "Two geometries are topologically equal if their interiors intersect and no part of the interior or boundary of one geometry intersects the exterior of the other" + * <li>"A is equal to B if A is within B and A contains B" + * </ul> + * + * @param query + * the queried geometry + * @param contraints + * the {@link StatementConstraints} + * @return + */ + public abstract CloseableIteration<Statement, QueryEvaluationException> queryEquals(Geometry query, StatementConstraints contraints); + + /** + * Returns statements that contain a geometry that is disjoint to the queried {@link Geometry} and meet the {@link StatementConstraints}. + * + * <p> + * From Wikipedia (http://en.wikipedia.org/wiki/DE-9IM): + * <ul> + * <li>"A and B are disjoint if they have no point in common. They form a set of disconnected geometries." + * <li>"A and B are disjoint if A does not intersect B" + * </ul> + * + * @param query + * the queried geometry + * @param contraints + * the {@link StatementConstraints} + * @return + */ + public abstract CloseableIteration<Statement, QueryEvaluationException> queryDisjoint(Geometry query, StatementConstraints contraints); + + /** + * Returns statements that contain a geometry that Intersects the queried {@link Geometry} and meet the {@link StatementConstraints}. + * + * <p> + * From Wikipedia (http://en.wikipedia.org/wiki/DE-9IM): + * <ul> + * <li>"a intersects b: geometries a and b have at least one point in common." + * <li>"not Disjoint" + * </ul> + * + * + * @param query + * the queried geometry + * @param contraints + * the {@link StatementConstraints} + * @return + */ + public abstract CloseableIteration<Statement, QueryEvaluationException> queryIntersects(Geometry query, StatementConstraints contraints); + + /** + * Returns statements that contain a geometry that Touches the queried {@link Geometry} and meet the {@link StatementConstraints}. + * + * <p> + * From Wikipedia (http://en.wikipedia.org/wiki/DE-9IM): + * <ul> + * <li>"a touches b, they have at least one boundary point in common, but no interior points." + * </ul> + * + * + * @param query + * the queried geometry + * @param contraints + * the {@link StatementConstraints} + * @return + */ + public abstract CloseableIteration<Statement, QueryEvaluationException> queryTouches(Geometry query, StatementConstraints contraints); + + /** + * Returns statements that contain a geometry that crosses the queried {@link Geometry} and meet the {@link StatementConstraints}. + * + * <p> + * From Wikipedia (http://en.wikipedia.org/wiki/DE-9IM): + * <ul> + * <li> + * "a crosses b, they have some but not all interior points in common (and the dimension of the intersection is less than that of at least one of them)." + * </ul> + * + * @param query + * the queried geometry + * @param contraints + * the {@link StatementConstraints} + * @return + */ + public abstract CloseableIteration<Statement, QueryEvaluationException> queryCrosses(Geometry query, StatementConstraints contraints); + + /** + * Returns statements that contain a geometry that is Within the queried {@link Geometry} and meet the {@link StatementConstraints}. + * + * <p> + * From Wikipedia (http://en.wikipedia.org/wiki/DE-9IM): + * <ul> + * <li>"a is within b, a lies in the interior of b" + * <li>Same as: "Contains(b,a)" + * </ul> + * + * + * @param query + * the queried geometry + * @param contraints + * the {@link StatementConstraints} + * @return + */ + public abstract CloseableIteration<Statement, QueryEvaluationException> queryWithin(Geometry query, StatementConstraints contraints); + + /** + * Returns statements that contain a geometry that Contains the queried {@link Geometry} and meet the {@link StatementConstraints}. + * + * <p> + * From Wikipedia (http://en.wikipedia.org/wiki/DE-9IM): + * <ul> + * <li>b is within a. Geometry b lies in the interior of a. Another definition: + * "a 'contains' b iff no points of b lie in the exterior of a, and at least one point of the interior of b lies in the interior of a" + * <li>Same: Within(b,a) + * </ul> + * + * + * @param query + * the queried geometry + * @param contraints + * the {@link StatementConstraints} + * @return + */ + public abstract CloseableIteration<Statement, QueryEvaluationException> queryContains(Geometry query, StatementConstraints contraints); + + /** + * Returns statements that contain a geometry that Overlaps the queried {@link Geometry} and meet the {@link StatementConstraints}. + * + * <p> + * From Wikipedia (http://en.wikipedia.org/wiki/DE-9IM): + * <ul> + * <li>a crosses b, they have some but not all interior points in common (and the dimension of the intersection is less than that of at + * least one of them). + * </ul> + * + * + * @param query + * the queried geometry + * @param contraints + * the {@link StatementConstraints} + * @return + */ + public abstract CloseableIteration<Statement, QueryEvaluationException> queryOverlaps(Geometry query, StatementConstraints contraints); +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/7727b165/extras/rya.geoindexing/src/main/java/mvm/rya/indexing/GeoRyaSailFactory.java ---------------------------------------------------------------------- diff --git a/extras/rya.geoindexing/src/main/java/mvm/rya/indexing/GeoRyaSailFactory.java b/extras/rya.geoindexing/src/main/java/mvm/rya/indexing/GeoRyaSailFactory.java new file mode 100644 index 0000000..368d3c9 --- /dev/null +++ b/extras/rya.geoindexing/src/main/java/mvm/rya/indexing/GeoRyaSailFactory.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package mvm.rya.indexing; + +import static java.util.Objects.requireNonNull; + +import java.net.UnknownHostException; +import java.util.Objects; + +import org.apache.accumulo.core.client.AccumuloException; +import org.apache.accumulo.core.client.AccumuloSecurityException; +import org.apache.accumulo.core.client.Connector; +import org.apache.hadoop.conf.Configuration; +import org.openrdf.sail.Sail; +import org.openrdf.sail.SailException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.mongodb.MongoClient; + +import mvm.rya.accumulo.AccumuloRdfConfiguration; +import mvm.rya.accumulo.AccumuloRyaDAO; +import mvm.rya.api.RdfCloudTripleStoreConfiguration; +import mvm.rya.api.instance.RyaDetailsRepository.RyaDetailsRepositoryException; +import mvm.rya.api.instance.RyaDetailsToConfiguration; +import mvm.rya.api.layout.TablePrefixLayoutStrategy; +import mvm.rya.api.persist.RyaDAO; +import mvm.rya.api.persist.RyaDAOException; +import mvm.rya.indexing.accumulo.ConfigUtils; +import mvm.rya.mongodb.MongoConnectorFactory; +import mvm.rya.mongodb.MongoDBRdfConfiguration; +import mvm.rya.mongodb.MongoDBRyaDAO; +import mvm.rya.mongodb.instance.MongoRyaInstanceDetailsRepository; +import mvm.rya.rdftriplestore.RdfCloudTripleStore; +import mvm.rya.rdftriplestore.inference.InferenceEngine; +import mvm.rya.rdftriplestore.inference.InferenceEngineException; +import mvm.rya.sail.config.RyaSailFactory; + +public class GeoRyaSailFactory { + private static final Logger LOG = LoggerFactory.getLogger(GeoRyaSailFactory.class); + + /** + * Creates an instance of {@link Sail} that is attached to a Rya instance. + * + * @param conf - Configures how the Sail object will be constructed. (not null) + * @return A {@link Sail} object that is backed by a Rya datastore. + * @throws SailException The object could not be created. + */ + public static Sail getInstance(final Configuration conf) throws AccumuloException, + AccumuloSecurityException, RyaDAOException, InferenceEngineException, SailException { + requireNonNull(conf); + return getRyaSail(conf); + } + + private static Sail getRyaSail(final Configuration config) throws InferenceEngineException, RyaDAOException, AccumuloException, AccumuloSecurityException, SailException { + final RdfCloudTripleStore store = new RdfCloudTripleStore(); + final RyaDAO<?> dao; + final RdfCloudTripleStoreConfiguration rdfConfig; + + final String user; + final String pswd; + // XXX Should(?) be MongoDBRdfConfiguration.MONGO_COLLECTION_PREFIX inside the if below. RYA-135 + final String ryaInstance = config.get(RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX); + Objects.requireNonNull(ryaInstance, "RyaInstance or table prefix is missing from configuration."+RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX); + + if(ConfigUtils.getUseMongo(config)) { + final MongoDBRdfConfiguration mongoConfig = new MongoDBRdfConfiguration(config); + rdfConfig = mongoConfig; + final MongoClient client = MongoConnectorFactory.getMongoClient(config); + try { + final MongoRyaInstanceDetailsRepository ryaDetailsRepo = new MongoRyaInstanceDetailsRepository(client, mongoConfig.getCollectionName()); + RyaDetailsToConfiguration.addRyaDetailsToConfiguration(ryaDetailsRepo.getRyaInstanceDetails(), mongoConfig); + } catch (final RyaDetailsRepositoryException e) { + LOG.info("Instance does not have a rya details collection, skipping."); + } dao = getMongoDAO((MongoDBRdfConfiguration)rdfConfig, client); + } else { + rdfConfig = new AccumuloRdfConfiguration(config); + user = rdfConfig.get(ConfigUtils.CLOUDBASE_USER); + pswd = rdfConfig.get(ConfigUtils.CLOUDBASE_PASSWORD); + Objects.requireNonNull(user, "Accumulo user name is missing from configuration."+ConfigUtils.CLOUDBASE_USER); + Objects.requireNonNull(pswd, "Accumulo user password is missing from configuration."+ConfigUtils.CLOUDBASE_PASSWORD); + rdfConfig.setTableLayoutStrategy( new TablePrefixLayoutStrategy(ryaInstance) ); + RyaSailFactory.updateAccumuloConfig((AccumuloRdfConfiguration) rdfConfig, user, pswd, ryaInstance); + dao = getAccumuloDAO((AccumuloRdfConfiguration)rdfConfig); + } + store.setRyaDAO(dao); + rdfConfig.setTablePrefix(ryaInstance); + + if (rdfConfig.isInfer()){ + final InferenceEngine inferenceEngine = new InferenceEngine(); + inferenceEngine.setConf(rdfConfig); + inferenceEngine.setRyaDAO(dao); + inferenceEngine.init(); + store.setInferenceEngine(inferenceEngine); + } + + store.initialize(); + + return store; + } + + private static MongoDBRyaDAO getMongoDAO(final MongoDBRdfConfiguration config, final MongoClient client) throws RyaDAOException { + MongoDBRyaDAO dao = null; + OptionalConfigUtils.setIndexers(config); + if(client != null) { + dao = new MongoDBRyaDAO(config, client); + } else { + try { + dao = new MongoDBRyaDAO(config); + } catch (NumberFormatException | UnknownHostException e) { + throw new RyaDAOException("Unable to connect to mongo at the configured location.", e); + } + } + dao.init(); + return dao; + } + + private static AccumuloRyaDAO getAccumuloDAO(final AccumuloRdfConfiguration config) throws AccumuloException, AccumuloSecurityException, RyaDAOException { + final Connector connector = ConfigUtils.getConnector(config); + final AccumuloRyaDAO dao = new AccumuloRyaDAO(); + dao.setConnector(connector); + + OptionalConfigUtils.setIndexers(config); + config.setDisplayQueryPlan(true); + + dao.setConf(config); + dao.init(); + return dao; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/7727b165/extras/rya.geoindexing/src/main/java/mvm/rya/indexing/OptionalConfigUtils.java ---------------------------------------------------------------------- diff --git a/extras/rya.geoindexing/src/main/java/mvm/rya/indexing/OptionalConfigUtils.java b/extras/rya.geoindexing/src/main/java/mvm/rya/indexing/OptionalConfigUtils.java new file mode 100644 index 0000000..089610b --- /dev/null +++ b/extras/rya.geoindexing/src/main/java/mvm/rya/indexing/OptionalConfigUtils.java @@ -0,0 +1,141 @@ +package mvm.rya.indexing; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import static java.util.Objects.requireNonNull; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.accumulo.core.client.AccumuloException; +import org.apache.accumulo.core.client.AccumuloSecurityException; +import org.apache.accumulo.core.client.BatchScanner; +import org.apache.accumulo.core.client.BatchWriter; +import org.apache.accumulo.core.client.Connector; +import org.apache.accumulo.core.client.Instance; +import org.apache.accumulo.core.client.MultiTableBatchWriter; +import org.apache.accumulo.core.client.Scanner; +import org.apache.accumulo.core.client.TableExistsException; +import org.apache.accumulo.core.client.TableNotFoundException; +import org.apache.accumulo.core.client.ZooKeeperInstance; +import org.apache.accumulo.core.client.admin.TableOperations; +import org.apache.accumulo.core.client.mock.MockInstance; +import org.apache.accumulo.core.security.Authorizations; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.log4j.Logger; +import org.openrdf.model.URI; +import org.openrdf.model.impl.URIImpl; + +import com.google.common.base.Optional; +import com.google.common.collect.Lists; + +import mvm.rya.accumulo.AccumuloRdfConfiguration; +import mvm.rya.api.RdfCloudTripleStoreConfiguration; +import mvm.rya.api.instance.RyaDetails; +import mvm.rya.indexing.GeoEnabledFilterFunctionOptimizer; +import mvm.rya.indexing.accumulo.ConfigUtils; +import mvm.rya.indexing.accumulo.entity.EntityCentricIndex; +import mvm.rya.indexing.accumulo.entity.EntityOptimizer; +import mvm.rya.indexing.accumulo.freetext.AccumuloFreeTextIndexer; +import mvm.rya.indexing.accumulo.freetext.LuceneTokenizer; +import mvm.rya.indexing.accumulo.freetext.Tokenizer; +import mvm.rya.indexing.accumulo.geo.GeoMesaGeoIndexer; +import mvm.rya.indexing.accumulo.temporal.AccumuloTemporalIndexer; +import mvm.rya.indexing.external.PrecomputedJoinIndexer; +import mvm.rya.indexing.mongodb.freetext.MongoFreeTextIndexer; +import mvm.rya.indexing.mongodb.geo.MongoGeoIndexer; +import mvm.rya.indexing.pcj.matching.PCJOptimizer; + +/** + * A set of configuration utils to read a Hadoop {@link Configuration} object and create Cloudbase/Accumulo objects. + * Soon will deprecate this class. Use installer for the set methods, use {@link RyaDetails} for the get methods. + * New code must separate parameters that are set at Rya install time from that which is specific to the client. + * Also Accumulo index tables are pushed down to the implementation and not configured in conf. + */ +public class OptionalConfigUtils extends ConfigUtils { + private static final Logger logger = Logger.getLogger(OptionalConfigUtils.class); + + + public static final String GEO_NUM_PARTITIONS = "sc.geo.numPartitions"; + + public static final String USE_GEO = "sc.use_geo"; + public static final String USE_FREETEXT = "sc.use_freetext"; + public static final String USE_TEMPORAL = "sc.use_temporal"; + public static final String USE_ENTITY = "sc.use_entity"; + public static final String USE_PCJ = "sc.use_pcj"; + public static final String USE_OPTIMAL_PCJ = "sc.use.optimal.pcj"; + public static final String USE_PCJ_UPDATER_INDEX = "sc.use.updater"; + public static final String GEO_PREDICATES_LIST = "sc.geo.predicates"; + + public static Set<URI> getGeoPredicates(final Configuration conf) { + return getPredicates(conf, GEO_PREDICATES_LIST); + } + + public static int getGeoNumPartitions(final Configuration conf) { + return conf.getInt(GEO_NUM_PARTITIONS, getNumPartitions(conf)); + } + + public static boolean getUseGeo(final Configuration conf) { + return conf.getBoolean(USE_GEO, false); + } + + + public static void setIndexers(final RdfCloudTripleStoreConfiguration conf) { + + final List<String> indexList = Lists.newArrayList(); + final List<String> optimizers = Lists.newArrayList(); + + boolean useFilterIndex = false; + ConfigUtils.setIndexers(conf); + for (String index : conf.getStrings(AccumuloRdfConfiguration.CONF_ADDITIONAL_INDEXERS)){ + indexList.add(index); + } + for (String optimizer : conf.getStrings(RdfCloudTripleStoreConfiguration.CONF_OPTIMIZERS)){ + optimizers.add(optimizer); + } + + if (ConfigUtils.getUseMongo(conf)) { + if (getUseGeo(conf)) { + indexList.add(MongoGeoIndexer.class.getName()); + useFilterIndex = true; + } + } else { + if (getUseGeo(conf)) { + indexList.add(GeoMesaGeoIndexer.class.getName()); + useFilterIndex = true; + } + } + + if (useFilterIndex) { + optimizers.remove(FilterFunctionOptimizer.class.getName()); + optimizers.add(GeoEnabledFilterFunctionOptimizer.class.getName()); + } + + conf.setStrings(AccumuloRdfConfiguration.CONF_ADDITIONAL_INDEXERS, indexList.toArray(new String[]{})); + conf.setStrings(RdfCloudTripleStoreConfiguration.CONF_OPTIMIZERS, optimizers.toArray(new String[]{})); + + } + + + +}