Repository: incubator-rya Updated Branches: refs/heads/develop d5202aa52 -> 358c13b83
RYA-11 initial implementation of free text for mongo backend Project: http://git-wip-us.apache.org/repos/asf/incubator-rya/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-rya/commit/c4d44eba Tree: http://git-wip-us.apache.org/repos/asf/incubator-rya/tree/c4d44eba Diff: http://git-wip-us.apache.org/repos/asf/incubator-rya/diff/c4d44eba Branch: refs/heads/develop Commit: c4d44ebae60c1ac3db5ea91d3f05ed9a588cf9b2 Parents: d5202aa Author: pujav65 <puja...@gmail.com> Authored: Wed Jan 13 11:21:52 2016 -0500 Committer: Aaron Mihalik <miha...@alum.mit.edu> Committed: Mon Mar 14 14:36:15 2016 -0400 ---------------------------------------------------------------------- .../rya/mongodb/MongoDBRdfConfiguration.java | 11 +- .../java/mvm/rya/mongodb/MongoDBRyaDAO.java | 7 + .../rya/indexing/FilterFunctionOptimizer.java | 5 +- .../mvm/rya/indexing/accumulo/ConfigUtils.java | 5 + .../indexing/mongodb/AbstractMongoIndexer.java | 3 +- .../indexing/mongodb/MongoFreeTextIndexer.java | 236 +++++++++++++++++++ .../rya/indexing/mongodb/MongoGeoIndexer.java | 23 +- .../mongodb/TextMongoDBStorageStrategy.java | 89 +++++++ .../mongo/MongoFreeTextIndexerTest.java | 201 ++++++++++++++++ .../src/main/java/MongoRyaDirectExample.java | 98 +++++++- 10 files changed, 661 insertions(+), 17 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/c4d44eba/dao/mongodb.rya/src/main/java/mvm/rya/mongodb/MongoDBRdfConfiguration.java ---------------------------------------------------------------------- diff --git a/dao/mongodb.rya/src/main/java/mvm/rya/mongodb/MongoDBRdfConfiguration.java b/dao/mongodb.rya/src/main/java/mvm/rya/mongodb/MongoDBRdfConfiguration.java index 3c5a8ef..0c38337 100644 --- a/dao/mongodb.rya/src/main/java/mvm/rya/mongodb/MongoDBRdfConfiguration.java +++ b/dao/mongodb.rya/src/main/java/mvm/rya/mongodb/MongoDBRdfConfiguration.java @@ -29,6 +29,7 @@ import mvm.rya.api.persist.index.RyaSecondaryIndexer; import org.apache.hadoop.conf.Configuration; import com.google.common.collect.Lists; +import com.mongodb.MongoClient; public class MongoDBRdfConfiguration extends RdfCloudTripleStoreConfiguration { public static final String MONGO_INSTANCE = "mongo.db.instance"; @@ -40,6 +41,7 @@ public class MongoDBRdfConfiguration extends RdfCloudTripleStoreConfiguration { public static final String MONGO_USER_PASSWORD = "mongo.db.userpassword"; public static final String USE_TEST_MONGO = "mongo.db.test"; public static final String CONF_ADDITIONAL_INDEXERS = "ac.additional.indexers"; + private MongoClient mongoClient; public MongoDBRdfConfiguration() { super(); @@ -113,9 +115,14 @@ public class MongoDBRdfConfiguration extends RdfCloudTripleStoreConfiguration { public List<RyaSecondaryIndexer> getAdditionalIndexers() { return getInstances(CONF_ADDITIONAL_INDEXERS, RyaSecondaryIndexer.class); - } - + } + public void setMongoClient(MongoClient client){ + this.mongoClient = client; + } + public MongoClient getMongoClient() { + return mongoClient; + } } http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/c4d44eba/dao/mongodb.rya/src/main/java/mvm/rya/mongodb/MongoDBRyaDAO.java ---------------------------------------------------------------------- diff --git a/dao/mongodb.rya/src/main/java/mvm/rya/mongodb/MongoDBRyaDAO.java b/dao/mongodb.rya/src/main/java/mvm/rya/mongodb/MongoDBRyaDAO.java index b9124e3..0ad96ff 100644 --- a/dao/mongodb.rya/src/main/java/mvm/rya/mongodb/MongoDBRyaDAO.java +++ b/dao/mongodb.rya/src/main/java/mvm/rya/mongodb/MongoDBRyaDAO.java @@ -70,6 +70,7 @@ public class MongoDBRyaDAO implements RyaDAO<MongoDBRdfConfiguration>{ public MongoDBRyaDAO(MongoDBRdfConfiguration conf) throws RyaDAOException{ this.conf = conf; initConnection(); + conf.setMongoClient(mongoClient); init(); } @@ -77,6 +78,7 @@ public class MongoDBRyaDAO implements RyaDAO<MongoDBRdfConfiguration>{ public MongoDBRyaDAO(MongoDBRdfConfiguration conf, MongoClient mongoClient) throws RyaDAOException{ this.conf = conf; this.mongoClient = mongoClient; + conf.setMongoClient(mongoClient); init(); } @@ -87,6 +89,11 @@ public class MongoDBRyaDAO implements RyaDAO<MongoDBRdfConfiguration>{ public void setMongoClient(MongoClient mongoClient) { this.mongoClient = mongoClient; } + + + public MongoClient getMongoClient(){ + return mongoClient; + } public void setDB(DB db) { this.db = db; http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/c4d44eba/extras/indexing/src/main/java/mvm/rya/indexing/FilterFunctionOptimizer.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/FilterFunctionOptimizer.java b/extras/indexing/src/main/java/mvm/rya/indexing/FilterFunctionOptimizer.java index 5d2678b..3425f3e 100644 --- a/extras/indexing/src/main/java/mvm/rya/indexing/FilterFunctionOptimizer.java +++ b/extras/indexing/src/main/java/mvm/rya/indexing/FilterFunctionOptimizer.java @@ -34,6 +34,7 @@ import mvm.rya.indexing.accumulo.geo.GeoMesaGeoIndexer; import mvm.rya.indexing.accumulo.geo.GeoTupleSet; import mvm.rya.indexing.accumulo.temporal.AccumuloTemporalIndexer; import mvm.rya.indexing.accumulo.temporal.TemporalTupleSet; +import mvm.rya.indexing.mongodb.MongoFreeTextIndexer; import mvm.rya.indexing.mongodb.MongoGeoIndexer; import org.apache.accumulo.core.client.AccumuloException; @@ -103,6 +104,8 @@ public class FilterFunctionOptimizer implements QueryOptimizer, Configurable { if (ConfigUtils.getUseMongo(conf)) { this.geoIndexer = new MongoGeoIndexer(); geoIndexer.setConf(conf); + this.freeTextIndexer = new MongoFreeTextIndexer(); + freeTextIndexer.setConf(conf); } else { this.geoIndexer = new GeoMesaGeoIndexer(); geoIndexer.setConf(conf); @@ -110,8 +113,8 @@ public class FilterFunctionOptimizer implements QueryOptimizer, Configurable { freeTextIndexer.setConf(conf); this.temporalIndexer = new AccumuloTemporalIndexer(); temporalIndexer.setConf(conf); - init = true; } + init = true; } } http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/c4d44eba/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/ConfigUtils.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/ConfigUtils.java b/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/ConfigUtils.java index ae16062..cf98078 100644 --- a/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/ConfigUtils.java +++ b/extras/indexing/src/main/java/mvm/rya/indexing/accumulo/ConfigUtils.java @@ -36,6 +36,7 @@ import mvm.rya.indexing.accumulo.freetext.Tokenizer; import mvm.rya.indexing.accumulo.geo.GeoMesaGeoIndexer; import mvm.rya.indexing.accumulo.temporal.AccumuloTemporalIndexer; import mvm.rya.indexing.external.PrecompJoinOptimizer; +import mvm.rya.indexing.mongodb.MongoFreeTextIndexer; import mvm.rya.indexing.mongodb.MongoGeoIndexer; import org.apache.accumulo.core.client.AccumuloException; @@ -381,6 +382,10 @@ public class ConfigUtils { indexList.add(MongoGeoIndexer.class.getName()); useFilterIndex = true; } + if (getUseFreeText(conf)) { + indexList.add(MongoFreeTextIndexer.class.getName()); + useFilterIndex = true; + } } else { if (getUsePCJ(conf) || getUseOptimalPCJ(conf)) { http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/c4d44eba/extras/indexing/src/main/java/mvm/rya/indexing/mongodb/AbstractMongoIndexer.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/mongodb/AbstractMongoIndexer.java b/extras/indexing/src/main/java/mvm/rya/indexing/mongodb/AbstractMongoIndexer.java index 0f8202c..5f2e700 100644 --- a/extras/indexing/src/main/java/mvm/rya/indexing/mongodb/AbstractMongoIndexer.java +++ b/extras/indexing/src/main/java/mvm/rya/indexing/mongodb/AbstractMongoIndexer.java @@ -44,8 +44,7 @@ public abstract class AbstractMongoIndexer implements RyaSecondaryIndexer { public Configuration getConf() { return null; } - - + @Override public String getTableName() { return null; http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/c4d44eba/extras/indexing/src/main/java/mvm/rya/indexing/mongodb/MongoFreeTextIndexer.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/mongodb/MongoFreeTextIndexer.java b/extras/indexing/src/main/java/mvm/rya/indexing/mongodb/MongoFreeTextIndexer.java new file mode 100644 index 0000000..3908eb3 --- /dev/null +++ b/extras/indexing/src/main/java/mvm/rya/indexing/mongodb/MongoFreeTextIndexer.java @@ -0,0 +1,236 @@ +package mvm.rya.indexing.mongodb; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import info.aduna.iteration.CloseableIteration; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Set; + +import mvm.rya.api.domain.RyaStatement; +import mvm.rya.api.resolver.RdfToRyaConversions; +import mvm.rya.api.resolver.RyaToRdfConversions; +import mvm.rya.indexing.FreeTextIndexer; +import mvm.rya.indexing.StatementContraints; +import mvm.rya.indexing.accumulo.ConfigUtils; +import mvm.rya.mongodb.MongoDBRdfConfiguration; + +import org.apache.hadoop.conf.Configuration; +import org.apache.log4j.Logger; +import org.openrdf.model.Literal; +import org.openrdf.model.Statement; +import org.openrdf.model.URI; +import org.openrdf.query.QueryEvaluationException; + +import com.mongodb.DB; +import com.mongodb.DBCollection; +import com.mongodb.DBCursor; +import com.mongodb.DBObject; +import com.mongodb.MongoClient; +import com.mongodb.MongoCredential; +import com.mongodb.ServerAddress; + +import de.flapdoodle.embed.mongo.distribution.Version; +import de.flapdoodle.embed.mongo.tests.MongodForTestsFactory; + +public class MongoFreeTextIndexer extends AbstractMongoIndexer implements FreeTextIndexer { + + private static final Logger logger = Logger + .getLogger(MongoFreeTextIndexer.class); + + private TextMongoDBStorageStrategy storageStrategy; + private MongoClient mongoClient; + private DB db; + private DBCollection coll; + private Set<URI> predicates; + private Configuration conf; + private boolean isInit = false; + private String tableName = ""; + + private void init() throws IOException{ + boolean useMongoTest = conf.getBoolean(MongoDBRdfConfiguration.USE_TEST_MONGO, false); + if (useMongoTest) { + boolean initializedClient = false; + if (conf instanceof MongoDBRdfConfiguration){ + MongoDBRdfConfiguration castedConf = (MongoDBRdfConfiguration) conf; + if (castedConf.getMongoClient() != null){ + this.mongoClient = castedConf.getMongoClient(); + initializedClient = true; + } + } + if (!initializedClient){ + MongodForTestsFactory testsFactory = MongodForTestsFactory.with(Version.Main.PRODUCTION); + mongoClient = testsFactory.newMongo(); + int port = mongoClient.getServerAddressList().get(0).getPort(); + conf.set(MongoDBRdfConfiguration.MONGO_INSTANCE_PORT, Integer.toString(port)); + } } else { + ServerAddress server = new ServerAddress(conf.get(MongoDBRdfConfiguration.MONGO_INSTANCE), + Integer.valueOf(conf.get(MongoDBRdfConfiguration.MONGO_INSTANCE_PORT))); + if (conf.get(MongoDBRdfConfiguration.MONGO_USER) != null) { + MongoCredential cred = MongoCredential.createCredential( + conf.get(MongoDBRdfConfiguration.MONGO_USER), + conf.get(MongoDBRdfConfiguration.MONGO_DB_NAME), + conf.get(MongoDBRdfConfiguration.MONGO_USER_PASSWORD).toCharArray()); + mongoClient = new MongoClient(server, Arrays.asList(cred)); + } else { + mongoClient = new MongoClient(server); + } + } + predicates = ConfigUtils.getFreeTextPredicates(conf); + tableName = conf.get(MongoDBRdfConfiguration.MONGO_DB_NAME); + db = mongoClient.getDB(tableName); + coll = db.getCollection(conf.get(MongoDBRdfConfiguration.MONGO_COLLECTION_PREFIX, "rya")); + storageStrategy = new TextMongoDBStorageStrategy(); + storageStrategy.createIndices(coll); + } + + @Override + public String getTableName() { + return tableName; + } + + @Override + public Configuration getConf() { + return conf; + } + + // setConf initializes because index is created via reflection + @Override + public void setConf(Configuration conf) { + this.conf = conf; + if (!isInit) { + try { + init(); + isInit = true; + } catch (IOException e) { + logger.warn( + "Unable to initialize index. Throwing Runtime Exception. ", + e); + throw new RuntimeException(e); + } + } + } + + private void storeStatement(Statement statement) throws IOException { + // if this is a valid predicate and a valid geometry + boolean isValidPredicate = predicates.isEmpty() + || predicates.contains(statement.getPredicate()); + + if (isValidPredicate && (statement.getObject() instanceof Literal)) { + + // add it to the collection + try { + DBObject obj = storageStrategy.serialize(statement); + if (obj != null) { + DBObject query = storageStrategy + .getQuery(RdfToRyaConversions + .convertStatement(statement)); + coll.update(query, obj, true, false); + } + } catch (com.mongodb.MongoException.DuplicateKey exception) { + // ignore + } catch (com.mongodb.DuplicateKeyException exception) { + // ignore + } catch (Exception ex) { + // ignore single exceptions + ex.printStackTrace(); + } + } + } + + @Override + public void storeStatement(RyaStatement statement) throws IOException { + storeStatement(RyaToRdfConversions.convertStatement(statement)); + } + + + private CloseableIteration<Statement, QueryEvaluationException> getIteratorWrapper( + final DBObject query, final DBCollection coll, + final TextMongoDBStorageStrategy storageStrategy) { + + return new CloseableIteration<Statement, QueryEvaluationException>() { + + private DBCursor cursor = null; + + private DBCursor getIterator() throws QueryEvaluationException { + if (cursor == null) { + cursor = coll.find(query); + } + return cursor; + } + + @Override + public boolean hasNext() throws QueryEvaluationException { + return getIterator().hasNext(); + } + + @Override + public Statement next() throws QueryEvaluationException { + DBObject feature = getIterator().next(); + RyaStatement statement = storageStrategy + .deserializeDBObject(feature); + return RyaToRdfConversions.convertStatement(statement); + } + + @Override + public void remove() { + throw new UnsupportedOperationException( + "Remove not implemented"); + } + + @Override + public void close() throws QueryEvaluationException { + getIterator().close(); + } + }; + } + + @Override + public Set<URI> getIndexablePredicates() { + return predicates; + } + + @Override + public void flush() throws IOException { + // TODO Auto-generated method stub + + } + + @Override + public void close() throws IOException { + mongoClient.close(); + } + + @Override + public void deleteStatement(RyaStatement stmt) throws IOException { + DBObject obj = storageStrategy.getQuery(stmt); + coll.remove(obj); + } + + @Override + public CloseableIteration<Statement, QueryEvaluationException> queryText( + String query, StatementContraints contraints) throws IOException { + DBObject obj = storageStrategy.getQuery(contraints, query); + long count = coll.count(obj); + return getIteratorWrapper(obj, coll, storageStrategy); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/c4d44eba/extras/indexing/src/main/java/mvm/rya/indexing/mongodb/MongoGeoIndexer.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/mongodb/MongoGeoIndexer.java b/extras/indexing/src/main/java/mvm/rya/indexing/mongodb/MongoGeoIndexer.java index 199883f..9aab6cd 100644 --- a/extras/indexing/src/main/java/mvm/rya/indexing/mongodb/MongoGeoIndexer.java +++ b/extras/indexing/src/main/java/mvm/rya/indexing/mongodb/MongoGeoIndexer.java @@ -68,16 +68,25 @@ public class MongoGeoIndexer extends AbstractMongoIndexer implements GeoIndexer private boolean isInit = false; private String tableName = ""; - private MongodForTestsFactory testsFactory; - private void init() throws NumberFormatException, IOException{ boolean useMongoTest = conf.getBoolean(MongoDBRdfConfiguration.USE_TEST_MONGO, false); if (useMongoTest) { - testsFactory = MongodForTestsFactory.with(Version.Main.PRODUCTION); - mongoClient = testsFactory.newMongo(); - int port = mongoClient.getServerAddressList().get(0).getPort(); - conf.set(MongoDBRdfConfiguration.MONGO_INSTANCE_PORT, Integer.toString(port)); - } else { + boolean initializedClient = false; + if (conf instanceof MongoDBRdfConfiguration){ + MongoDBRdfConfiguration castedConf = (MongoDBRdfConfiguration) conf; + if (castedConf.getMongoClient() != null){ + this.mongoClient = castedConf.getMongoClient(); + initializedClient = true; + } + } + if (!initializedClient){ + MongodForTestsFactory testsFactory = MongodForTestsFactory.with(Version.Main.PRODUCTION); + mongoClient = testsFactory.newMongo(); + int port = mongoClient.getServerAddressList().get(0).getPort(); + conf.set(MongoDBRdfConfiguration.MONGO_INSTANCE_PORT, Integer.toString(port)); + } + + } else { ServerAddress server = new ServerAddress(conf.get(MongoDBRdfConfiguration.MONGO_INSTANCE), Integer.valueOf(conf.get(MongoDBRdfConfiguration.MONGO_INSTANCE_PORT))); if (conf.get(MongoDBRdfConfiguration.MONGO_USER) != null) { http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/c4d44eba/extras/indexing/src/main/java/mvm/rya/indexing/mongodb/TextMongoDBStorageStrategy.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/mongodb/TextMongoDBStorageStrategy.java b/extras/indexing/src/main/java/mvm/rya/indexing/mongodb/TextMongoDBStorageStrategy.java new file mode 100644 index 0000000..fe8442a --- /dev/null +++ b/extras/indexing/src/main/java/mvm/rya/indexing/mongodb/TextMongoDBStorageStrategy.java @@ -0,0 +1,89 @@ +package mvm.rya.indexing.mongodb; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +import java.util.Set; + +import mvm.rya.api.domain.RyaStatement; +import mvm.rya.api.resolver.RdfToRyaConversions; +import mvm.rya.indexing.StatementContraints; +import mvm.rya.mongodb.dao.SimpleMongoDBStorageStrategy; + +import org.openrdf.model.Statement; +import org.openrdf.model.URI; + +import com.mongodb.BasicDBList; +import com.mongodb.BasicDBObject; +import com.mongodb.DBCollection; +import com.mongodb.DBObject; +import com.mongodb.QueryBuilder; +import com.vividsolutions.jts.io.ParseException; + +public class TextMongoDBStorageStrategy extends SimpleMongoDBStorageStrategy{ + + private static final String text = "text"; + + public void createIndices(DBCollection coll){ + BasicDBObject basicDBObject = new BasicDBObject(); + basicDBObject.append(text, "text"); + coll.createIndex(basicDBObject); + + } + + public DBObject getQuery(StatementContraints contraints, String textquery) { + // TODO right now assuming the query string is a valid mongo query, this is + // not the case. Should reuse the Accumulo free text parsing of the query string to make sure + // that behavior is consistent across Accumulo vs Mongo + QueryBuilder queryBuilder = QueryBuilder.start().text(textquery); + + if (contraints.hasSubject()){ + queryBuilder.and(new BasicDBObject(SUBJECT, contraints.getSubject().toString())); + } + if (contraints.hasPredicates()){ + Set<URI> predicates = contraints.getPredicates(); + if (predicates.size() > 1){ + BasicDBList or = new BasicDBList(); + for (URI pred : predicates){ + DBObject currentPred = new BasicDBObject(PREDICATE, pred.toString()); + or.add(currentPred); + } + queryBuilder.or(or); + } + else if (!predicates.isEmpty()){ + queryBuilder.and(new BasicDBObject(PREDICATE, predicates.iterator().next().toString())); + } + } + if (contraints.hasContext()){ + queryBuilder.and(new BasicDBObject(CONTEXT, contraints.getContext().toString())); + } + return queryBuilder.get(); + } + + public DBObject serialize(Statement statement) throws ParseException{ + + RyaStatement ryaStatement = RdfToRyaConversions.convertStatement(statement); + BasicDBObject base = (BasicDBObject) super.serialize(ryaStatement); + base.append(text, ryaStatement.getObject().getData()); + return base; + + } + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/c4d44eba/extras/indexing/src/test/java/mvm/rya/indexing/mongo/MongoFreeTextIndexerTest.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/test/java/mvm/rya/indexing/mongo/MongoFreeTextIndexerTest.java b/extras/indexing/src/test/java/mvm/rya/indexing/mongo/MongoFreeTextIndexerTest.java new file mode 100644 index 0000000..8d52b2d --- /dev/null +++ b/extras/indexing/src/test/java/mvm/rya/indexing/mongo/MongoFreeTextIndexerTest.java @@ -0,0 +1,201 @@ +package mvm.rya.indexing.mongo; +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import info.aduna.iteration.CloseableIteration; + +import java.util.HashSet; +import java.util.Set; + +import junit.framework.Assert; +import mvm.rya.api.domain.RyaStatement; +import mvm.rya.api.domain.RyaType; +import mvm.rya.api.domain.RyaURI; +import mvm.rya.api.resolver.RdfToRyaConversions; +import mvm.rya.api.resolver.RyaToRdfConversions; +import mvm.rya.indexing.StatementContraints; +import mvm.rya.indexing.accumulo.ConfigUtils; +import mvm.rya.indexing.mongodb.MongoFreeTextIndexer; +import mvm.rya.mongodb.MongoDBRdfConfiguration; + +import org.apache.hadoop.conf.Configuration; +import org.junit.Before; +import org.junit.Test; +import org.openrdf.model.Statement; +import org.openrdf.model.URI; +import org.openrdf.model.Value; +import org.openrdf.model.ValueFactory; +import org.openrdf.model.impl.URIImpl; +import org.openrdf.model.impl.ValueFactoryImpl; +import org.openrdf.model.vocabulary.RDFS; + +import com.google.common.collect.Sets; + +public class MongoFreeTextIndexerTest { + private static final StatementContraints EMPTY_CONSTRAINTS = new StatementContraints(); + + Configuration conf; + + @Before + public void before() throws Exception { + conf = new Configuration(); + conf.set(ConfigUtils.USE_MONGO, "true"); + conf.set(MongoDBRdfConfiguration.USE_TEST_MONGO, "true"); + conf.set(MongoDBRdfConfiguration.MONGO_DB_NAME, "test"); + conf.set(MongoDBRdfConfiguration.MONGO_COLLECTION_PREFIX, "rya_"); + } + + @Test + public void testSearch() throws Exception { + try (MongoFreeTextIndexer f = new MongoFreeTextIndexer()) { + f.setConf(conf); + + ValueFactory vf = new ValueFactoryImpl(); + + URI subject = new URIImpl("foo:subj"); + URI predicate = RDFS.LABEL; + Value object = vf.createLiteral("this is a new hat"); + + URI context = new URIImpl("foo:context"); + + Statement statement = vf.createStatement(subject, predicate, object, context); + f.storeStatement(RdfToRyaConversions.convertStatement(statement)); + f.flush(); + + Assert.assertEquals(Sets.newHashSet(), getSet(f.queryText("asdf", EMPTY_CONSTRAINTS))); + + Assert.assertEquals(Sets.newHashSet(statement), getSet(f.queryText("new", EMPTY_CONSTRAINTS))); + Assert.assertEquals(Sets.newHashSet(statement), getSet(f.queryText("hat new", EMPTY_CONSTRAINTS))); + } + } + + @Test + public void testDelete() throws Exception { + try (MongoFreeTextIndexer f = new MongoFreeTextIndexer()) { + f.setConf(conf); + + ValueFactory vf = new ValueFactoryImpl(); + + URI subject1 = new URIImpl("foo:subj"); + URI predicate1 = RDFS.LABEL; + Value object1 = vf.createLiteral("this is a new hat"); + + URI context1 = new URIImpl("foo:context"); + + Statement statement1 = vf.createStatement(subject1, predicate1, object1, context1); + f.storeStatement(RdfToRyaConversions.convertStatement(statement1)); + + URI subject2 = new URIImpl("foo:subject"); + URI predicate2 = RDFS.LABEL; + Value object2 = vf.createLiteral("Do you like my new hat?"); + + URI context2 = new URIImpl("foo:context"); + + Statement statement2 = vf.createStatement(subject2, predicate2, object2, context2); + f.storeStatement(RdfToRyaConversions.convertStatement(statement2)); + + f.flush(); + + + System.out.println("testDelete: BEFORE DELETE"); + + f.deleteStatement(RdfToRyaConversions.convertStatement(statement1)); + System.out.println("testDelete: AFTER FIRST DELETION"); +// Assert.assertEquals(Sets.newHashSet(), getSet(f.queryText("this is a new hat", EMPTY_CONSTRAINTS))); + Assert.assertEquals(Sets.newHashSet(statement2), getSet(f.queryText("Do you like my new hat?", EMPTY_CONSTRAINTS))); + + // Check that "new" didn't get deleted from the term table after "this is a new hat" + // was deleted since "new" is still in "Do you like my new hat?" + Assert.assertEquals(Sets.newHashSet(statement2), getSet(f.queryText("new", EMPTY_CONSTRAINTS))); + + f.deleteStatement(RdfToRyaConversions.convertStatement(statement2)); + System.out.println("testDelete: AFTER LAST DELETION"); + + System.out.println("testDelete: DONE"); + Assert.assertEquals(Sets.newHashSet(), getSet(f.queryText("this is a new hat", EMPTY_CONSTRAINTS))); + Assert.assertEquals(Sets.newHashSet(), getSet(f.queryText("Do you like my new hat?", EMPTY_CONSTRAINTS))); + } + } + + @Test + public void testRestrictPredicatesSearch() throws Exception { + conf.setStrings(ConfigUtils.FREETEXT_PREDICATES_LIST, "pred:1,pred:2"); + + try (MongoFreeTextIndexer f = new MongoFreeTextIndexer()) { + f.setConf(conf); + + // These should not be stored because they are not in the predicate list + f.storeStatement(new RyaStatement(new RyaURI("foo:subj1"), new RyaURI(RDFS.LABEL.toString()), new RyaType("invalid"))); + f.storeStatement(new RyaStatement(new RyaURI("foo:subj2"), new RyaURI(RDFS.COMMENT.toString()), new RyaType("invalid"))); + + RyaURI pred1 = new RyaURI("pred:1"); + RyaURI pred2 = new RyaURI("pred:2"); + + // These should be stored because they are in the predicate list + RyaStatement s3 = new RyaStatement(new RyaURI("foo:subj3"), pred1, new RyaType("valid")); + RyaStatement s4 = new RyaStatement(new RyaURI("foo:subj4"), pred2, new RyaType("valid")); + f.storeStatement(s3); + f.storeStatement(s4); + + // This should not be stored because the object is not a literal + f.storeStatement(new RyaStatement(new RyaURI("foo:subj5"), pred1, new RyaURI("in:validURI"))); + + f.flush(); + + Assert.assertEquals(Sets.newHashSet(), getSet(f.queryText("invalid", EMPTY_CONSTRAINTS))); + Assert.assertEquals(Sets.newHashSet(), getSet(f.queryText("in:validURI", EMPTY_CONSTRAINTS))); + + Set<Statement> actual = getSet(f.queryText("valid", EMPTY_CONSTRAINTS)); + Assert.assertEquals(2, actual.size()); + Assert.assertTrue(actual.contains(RyaToRdfConversions.convertStatement(s3))); + Assert.assertTrue(actual.contains(RyaToRdfConversions.convertStatement(s4))); + } + } + + @Test + public void testContextSearch() throws Exception { + try (MongoFreeTextIndexer f = new MongoFreeTextIndexer()) { + f.setConf(conf); + + ValueFactory vf = new ValueFactoryImpl(); + URI subject = new URIImpl("foo:subj"); + URI predicate = new URIImpl(RDFS.COMMENT.toString()); + Value object = vf.createLiteral("this is a new hat"); + URI context = new URIImpl("foo:context"); + + Statement statement = vf.createStatement(subject, predicate, object, context); + f.storeStatement(RdfToRyaConversions.convertStatement(statement)); + f.flush(); + + Assert.assertEquals(Sets.newHashSet(statement), getSet(f.queryText("hat", EMPTY_CONSTRAINTS))); + Assert.assertEquals(Sets.newHashSet(statement), getSet(f.queryText("hat", new StatementContraints().setContext(context)))); + Assert.assertEquals(Sets.newHashSet(), + getSet(f.queryText("hat", new StatementContraints().setContext(vf.createURI("foo:context2"))))); + } + } + + + private static <X> Set<X> getSet(CloseableIteration<X, ?> iter) throws Exception { + Set<X> set = new HashSet<X>(); + while (iter.hasNext()) { + set.add(iter.next()); + } + return set; + } +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/c4d44eba/extras/indexingExample/src/main/java/MongoRyaDirectExample.java ---------------------------------------------------------------------- diff --git a/extras/indexingExample/src/main/java/MongoRyaDirectExample.java b/extras/indexingExample/src/main/java/MongoRyaDirectExample.java index 4cdb504..7946afc 100644 --- a/extras/indexingExample/src/main/java/MongoRyaDirectExample.java +++ b/extras/indexingExample/src/main/java/MongoRyaDirectExample.java @@ -21,6 +21,7 @@ import java.util.List; import mvm.rya.api.RdfCloudTripleStoreConfiguration; import mvm.rya.indexing.accumulo.ConfigUtils; +import mvm.rya.indexing.accumulo.geo.GeoConstants; import mvm.rya.mongodb.MongoDBRdfConfiguration; import mvm.rya.sail.config.RyaSailFactory; @@ -28,6 +29,10 @@ import org.apache.commons.lang.Validate; import org.apache.hadoop.conf.Configuration; import org.apache.log4j.Logger; import org.openrdf.model.Namespace; +import org.openrdf.model.URI; +import org.openrdf.model.ValueFactory; +import org.openrdf.model.vocabulary.RDF; +import org.openrdf.model.vocabulary.RDFS; import org.openrdf.query.BindingSet; import org.openrdf.query.MalformedQueryException; import org.openrdf.query.QueryEvaluationException; @@ -70,10 +75,11 @@ public class MongoRyaDirectExample { long start = System.currentTimeMillis(); log.info("Running SPARQL Example: Add and Delete"); - testAddAndDelete(conn); - testAddAndDeleteNoContext(conn); - testAddNamespaces(conn); - testAddPointAndWithinSearch(conn); +// testAddAndDelete(conn); +// testAddAndDeleteNoContext(conn); +// testAddNamespaces(conn); +// testAddPointAndWithinSearch(conn); + testAddAndFreeTextSearchWithPCJ(conn); log.info("TIME: " + (System.currentTimeMillis() - start) / 1000.); } finally { @@ -158,6 +164,86 @@ public class MongoRyaDirectExample { } } + private static void testAddAndFreeTextSearchWithPCJ(SailRepositoryConnection conn) throws Exception { + // add data to the repository using the SailRepository add methods + ValueFactory f = conn.getValueFactory(); + URI person = f.createURI("http://example.org/ontology/Person"); + + String uuid; + + uuid = "urn:people:alice"; + conn.add(f.createURI(uuid), RDF.TYPE, person); + conn.add(f.createURI(uuid), RDFS.LABEL, f.createLiteral("Alice Palace Hose", f.createURI("xsd:string"))); + + uuid = "urn:people:bobss"; + conn.add(f.createURI(uuid), RDF.TYPE, person); + conn.add(f.createURI(uuid), RDFS.LABEL, f.createLiteral("Bob Snob Hose", "en")); + + String queryString; + TupleQuery tupleQuery; + CountingResultHandler tupleHandler; + + // ///////////// search for alice + queryString = "PREFIX fts: <http://rdf.useekm.com/fts#> "// + + "SELECT ?person ?match ?e ?c ?l ?o " // + + "{" // + + " ?person <http://www.w3.org/2000/01/rdf-schema#label> ?match . "// + + " FILTER(fts:text(?match, \"Palace\")) " // + + "}";// + tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, queryString); + tupleHandler = new CountingResultHandler(); + tupleQuery.evaluate(tupleHandler); + log.info("Result count : " + tupleHandler.getCount()); + Validate.isTrue(tupleHandler.getCount() == 1); + + + // ///////////// search for alice and bob + queryString = "PREFIX fts: <http://rdf.useekm.com/fts#> "// + + "SELECT ?person ?match " // + + "{" // + + " ?person <http://www.w3.org/2000/01/rdf-schema#label> ?match . "// + + " ?person a <http://example.org/ontology/Person> . "// + + " FILTER(fts:text(?match, \"alice\")) " // + + "}";// + tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, queryString); + tupleHandler = new CountingResultHandler(); + tupleQuery.evaluate(tupleHandler); + log.info("Result count : " + tupleHandler.getCount()); + Validate.isTrue(tupleHandler.getCount() == 1); + + // ///////////// search for alice and bob + queryString = "PREFIX fts: <http://rdf.useekm.com/fts#> "// + + "SELECT ?person ?match " // + + "{" // + + " ?person a <http://example.org/ontology/Person> . "// + + " ?person <http://www.w3.org/2000/01/rdf-schema#label> ?match . "// + + " FILTER(fts:text(?match, \"alice\")) " // + + " FILTER(fts:text(?match, \"palace\")) " // + + "}";// + tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, queryString); + tupleHandler = new CountingResultHandler(); + tupleQuery.evaluate(tupleHandler); + log.info("Result count : " + tupleHandler.getCount()); + Validate.isTrue(tupleHandler.getCount() == 1); + + + // ///////////// search for bob + queryString = "PREFIX fts: <http://rdf.useekm.com/fts#> "// + + "SELECT ?person ?match ?e ?c ?l ?o " // + + "{" // + + " ?person a <http://example.org/ontology/Person> . "// + + " ?person <http://www.w3.org/2000/01/rdf-schema#label> ?match . "// + // this is an or query in mongo, a and query in accumulo + + " FILTER(fts:text(?match, \"alice hose\")) " // + + "}";// + + tupleQuery = conn.prepareTupleQuery(QueryLanguage.SPARQL, queryString); + tupleHandler = new CountingResultHandler(); + tupleQuery.evaluate(tupleHandler); + log.info("Result count : " + tupleHandler.getCount()); + Validate.isTrue(tupleHandler.getCount() == 2); + } + private static Configuration getConf() { Configuration conf = new Configuration(); @@ -167,8 +253,10 @@ public class MongoRyaDirectExample { conf.set(MongoDBRdfConfiguration.MONGO_COLLECTION_PREFIX, MONGO_COLL_PREFIX); conf.set(ConfigUtils.GEO_PREDICATES_LIST, "http://www.opengis.net/ont/geosparql#asWKT"); conf.set(ConfigUtils.USE_GEO, "true"); + conf.set(ConfigUtils.USE_FREETEXT, "true"); conf.set(RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX, MONGO_COLL_PREFIX); - + conf.set(ConfigUtils.GEO_PREDICATES_LIST, GeoConstants.GEO_AS_WKT.stringValue()); + conf.set(ConfigUtils.FREETEXT_PREDICATES_LIST, RDFS.LABEL.stringValue()); return conf; }