http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/indexing/src/test/java/mvm/rya/indexing/accumulo/entity/EntityOptimizerTest.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/test/java/mvm/rya/indexing/accumulo/entity/EntityOptimizerTest.java b/extras/indexing/src/test/java/mvm/rya/indexing/accumulo/entity/EntityOptimizerTest.java new file mode 100644 index 0000000..eb76b81 --- /dev/null +++ b/extras/indexing/src/test/java/mvm/rya/indexing/accumulo/entity/EntityOptimizerTest.java @@ -0,0 +1,1337 @@ +package mvm.rya.indexing.accumulo.entity; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Set; +import java.util.concurrent.TimeUnit; + +import mvm.rya.accumulo.AccumuloRdfConfiguration; +import mvm.rya.api.RdfCloudTripleStoreConfiguration; +import mvm.rya.api.layout.TablePrefixLayoutStrategy; +import mvm.rya.api.persist.RdfEvalStatsDAO; +import mvm.rya.indexing.accumulo.ConfigUtils; +import mvm.rya.indexing.accumulo.entity.EntityOptimizer; +import mvm.rya.indexing.accumulo.entity.EntityTupleSet; +import mvm.rya.joinselect.AccumuloSelectivityEvalDAO; +import mvm.rya.prospector.service.ProspectorServiceEvalStatsDAO; + +import org.apache.accumulo.core.client.AccumuloException; +import org.apache.accumulo.core.client.AccumuloSecurityException; +import org.apache.accumulo.core.client.BatchWriter; +import org.apache.accumulo.core.client.BatchWriterConfig; +import org.apache.accumulo.core.client.Connector; +import org.apache.accumulo.core.client.TableExistsException; +import org.apache.accumulo.core.client.TableNotFoundException; +import org.apache.accumulo.core.client.mock.MockInstance; +import org.apache.accumulo.core.data.Mutation; +import org.apache.accumulo.core.data.Value; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Text; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.openrdf.query.MalformedQueryException; +import org.openrdf.query.QueryEvaluationException; +import org.openrdf.query.TupleQueryResultHandlerException; +import org.openrdf.query.algebra.QueryModelNode; +import org.openrdf.query.algebra.TupleExpr; +import org.openrdf.query.algebra.evaluation.impl.FilterOptimizer; +import org.openrdf.query.algebra.helpers.QueryModelVisitorBase; +import org.openrdf.query.parser.ParsedQuery; +import org.openrdf.query.parser.sparql.SPARQLParser; +import org.openrdf.repository.RepositoryException; + +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; + +public class EntityOptimizerTest { + + private static final String DELIM = "\u0000"; + private final byte[] EMPTY_BYTE = new byte[0]; + private final Value EMPTY_VAL = new Value(EMPTY_BYTE); + + private String q1 = ""// + + "SELECT ?h " // + + "{" // + + " ?h <http://www.w3.org/2000/01/rdf-schema#label> <uri:dog> ."// + + " ?h <uri:barksAt> <uri:cat> ."// + + " ?h <uri:peesOn> <uri:hydrant> . "// + + "}";// + + + private String q2 = ""// + + "SELECT ?h ?m" // + + "{" // + + " ?h <http://www.w3.org/2000/01/rdf-schema#label> <uri:dog> ."// + + " ?h <uri:barksAt> <uri:cat> ."// + + " ?h <uri:peesOn> <uri:hydrant> . "// + + " ?m <uri:eats> <uri:chickens>. " // + + " ?m <uri:scratches> <uri:ears>. " // + + "}";// + + private String Q2 = ""// + + "SELECT ?h ?l ?m" // + + "{" // + + " ?h <uri:peesOn> <uri:hydrant> . "// + + " ?h <uri:barksAt> <uri:cat> ."// + + " ?h <http://www.w3.org/2000/01/rdf-schema#label> <uri:dog> ."// + + " ?m <uri:eats> <uri:chickens>. " // + + " ?m <uri:scratches> <uri:ears>. " // + + "}";// + + private String q3 = ""// + + "SELECT ?h ?l ?m" // + + "{" // + + " ?h <http://www.w3.org/2000/01/rdf-schema#label> <uri:dog> ."// + + " ?h <uri:barksAt> <uri:cat> ."// + + " ?h <uri:peesOn> <uri:hydrant> . "// + + " {?m <uri:eats> <uri:chickens>} OPTIONAL {?m <uri:scratches> <uri:ears>}. " // + + " {?m <uri:eats> <uri:kibble>. ?m <uri:watches> <uri:television>.} UNION {?m <uri:rollsIn> <uri:mud>}. " // + + " ?l <uri:runsIn> <uri:field> ."// + + " ?l <uri:smells> <uri:butt> ."// + + " ?l <uri:eats> <uri:sticks> ."// + + "}";// + + private String Q4 = ""// + + "SELECT ?h ?l ?m" // + + "{" // + + " ?h <uri:barksAt> <uri:cat> ."// + + " ?m <uri:scratches> <uri:ears>. " // + + " ?m <uri:eats> <uri:chickens>. " // + + " ?h <http://www.w3.org/2000/01/rdf-schema#label> <uri:dog> ."// + + " ?h <uri:peesOn> <uri:hydrant> . "// + + "}";// + + private String q5 = ""// + + "SELECT ?h ?m" // + + "{" // + + " ?h <http://www.w3.org/2000/01/rdf-schema#label> <uri:dog> ."// + + " ?h <uri:barksAt> <uri:cat> ."// + + " ?h <uri:peesOn> ?m . "// + + " ?m <uri:eats> <uri:chickens>. " // + + " ?m <uri:scratches> <uri:ears>. " // + + "}";// + + + + private String q6 = ""// + + "SELECT ?h ?i ?l ?m" // + + "{" // + + " ?h <http://www.w3.org/2000/01/rdf-schema#label> <uri:dog> ."// + + " ?h <uri:barksAt> <uri:cat> ."// + + " ?h <uri:peesOn> <uri:hydrant> . "// + + " {?m <uri:eats> <uri:chickens>} OPTIONAL {?m <uri:scratches> <uri:ears>}. " // + + " {?m <uri:eats> <uri:kibble>. ?m <uri:watches> ?i. ?i <uri:runsIn> <uri:field> .} " // + + " UNION {?m <uri:rollsIn> <uri:mud>. ?l <uri:smells> ?m . ?l <uri:eats> <uri:sticks> . }. " // + + "}";// + + + private String q7 = ""// + + "SELECT ?h ?m" // + + "{" // + + " ?h <http://www.w3.org/2000/01/rdf-schema#label> <uri:chickens> ."// + + " ?h <uri:barksAt> <uri:chickens> ."// + + " ?h <uri:peesOn> ?m . "// + + " ?m <uri:eats> <uri:chickens>. " // + + " ?m <uri:scratches> <uri:ears>. " // + + "}";// + + + private String q8 = ""// + + "SELECT ?h ?m" // + + "{" // + + " Filter(?h = \"Diego\") " // + + " Filter(?m = \"Rosie\") " // + + " ?h <http://www.w3.org/2000/01/rdf-schema#label> <uri:chickens> ."// + + " ?h <uri:barksAt> <uri:chickens> ."// + + " ?h <uri:peesOn> ?m . "// + + " ?m <uri:eats> <uri:chickens>. " // + + " ?m <uri:scratches> <uri:ears>. " // + + "}";// + + + + + private String q9 = ""// + + "SELECT ?h ?i ?l ?m" // + + "{" // + + " Filter(?h = \"Diego\") " // + + " Filter(?m = \"Rosie\") " // + + " ?h <http://www.w3.org/2000/01/rdf-schema#label> <uri:dog> ."// + + " ?h <uri:barksAt> <uri:cat> ."// + + " ?h <uri:peesOn> <uri:hydrant> . "// + + " {?m <uri:eats> <uri:chickens>} OPTIONAL {?m <uri:scratches> <uri:ears>}. " // + + " { Filter(?i = \"Bobo\"). ?m <uri:eats> <uri:kibble>. ?m <uri:watches> ?i. ?i <uri:runsIn> <uri:field> .} " // + + " UNION {?m <uri:rollsIn> <uri:mud>. ?l <uri:smells> ?m . ?l <uri:eats> <uri:sticks> . }. " // + + "}";// + + + + private Connector accCon; + AccumuloRdfConfiguration conf; + BatchWriterConfig config; + RdfEvalStatsDAO<RdfCloudTripleStoreConfiguration> res; + + + + @Before + public void init() throws RepositoryException, TupleQueryResultHandlerException, QueryEvaluationException, + MalformedQueryException, AccumuloException, AccumuloSecurityException, TableExistsException { + + + + accCon = new MockInstance("instance").getConnector("root", "".getBytes()); + + config = new BatchWriterConfig(); + config.setMaxMemory(1000); + config.setMaxLatency(1000, TimeUnit.SECONDS); + config.setMaxWriteThreads(10); + + if (accCon.tableOperations().exists("rya_prospects")) { + try { + accCon.tableOperations().delete("rya_prospects"); + } catch (TableNotFoundException e) { + e.printStackTrace(); + } + } + if (accCon.tableOperations().exists("rya_selectivity")) { + try { + accCon.tableOperations().delete("rya_selectivity"); + } catch (TableNotFoundException e) { + e.printStackTrace(); + } + } + + accCon.tableOperations().create("rya_prospects"); + accCon.tableOperations().create("rya_selectivity"); + + Configuration con = new Configuration(); + con.set(ConfigUtils.CLOUDBASE_AUTHS, "U"); + con.set(ConfigUtils.CLOUDBASE_INSTANCE, "instance"); + con.set(ConfigUtils.CLOUDBASE_USER, "root"); + con.set(ConfigUtils.CLOUDBASE_PASSWORD, ""); + conf = new AccumuloRdfConfiguration(con); + TablePrefixLayoutStrategy tps = new TablePrefixLayoutStrategy("rya_"); + conf.setTableLayoutStrategy(tps); + conf.set(ConfigUtils.USE_MOCK_INSTANCE, "true"); + + + res = new ProspectorServiceEvalStatsDAO(accCon, conf); + + } + + + @Test + public void testOptimizeQ1SamePriority() throws Exception { + + AccumuloSelectivityEvalDAO accc = new AccumuloSelectivityEvalDAO(); + accc.setConf(conf); + accc.setConnector(accCon); + accc.setRdfEvalDAO(res); + accc.init(); + + BatchWriter bw1 = accCon.createBatchWriter("rya_prospects", config); + BatchWriter bw2 = accCon.createBatchWriter("rya_selectivity", config); + + String s1 = "predicateobject" + DELIM + "http://www.w3.org/2000/01/rdf-schema#label" + DELIM + "uri:dog"; + String s2 = "predicateobject" + DELIM + "uri:barksAt" + DELIM + "uri:cat"; + String s3 = "predicateobject" + DELIM + "uri:peesOn" + DELIM + "uri:hydrant"; + List<Mutation> mList = new ArrayList<Mutation>(); + List<Mutation> mList2 = new ArrayList<Mutation>(); + List<String> sList = Arrays.asList("subjectobject", "subjectpredicate", "subjectsubject"); + Mutation m1, m2, m3, m4; + + m1 = new Mutation(s1 + DELIM + "1"); + m1.put(new Text("count"), new Text(""), new Value("1".getBytes())); + m2 = new Mutation(s2 + DELIM + "1"); + m2.put(new Text("count"), new Text(""), new Value("1".getBytes())); + m3 = new Mutation(s3 + DELIM + "1"); + m3.put(new Text("count"), new Text(""), new Value("1".getBytes())); + mList.add(m1); + mList.add(m2); + mList.add(m3); + + bw1.addMutations(mList); + bw1.close(); + + + m1 = new Mutation(s1); + m2 = new Mutation(s2); + m3 = new Mutation(s3); + m4 = new Mutation(new Text("subjectpredicateobject" + DELIM + "FullTableCardinality")); + m4.put(new Text("FullTableCardinality"), new Text("100"), EMPTY_VAL); + + + for (String s : sList) { + + m1.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m2.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m3.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + + } + mList2.add(m1); + mList2.add(m2); + mList2.add(m3); + mList2.add(m4); + bw2.addMutations(mList2); + bw2.close(); + + + TupleExpr te = getTupleExpr(q1); + + EntityOptimizer cco = new EntityOptimizer(accc); + System.out.println("Originial query is " + te); + cco.optimize(te, null, null); + + + EntityCentricVisitor ccv = new EntityCentricVisitor(); + te.visit(ccv); + + Assert.assertEquals(1, ccv.getCcNodes().size()); + + System.out.println(te); + + } + + + + + + @Test + public void testOptimizeQ2SamePriority() throws Exception { + + AccumuloSelectivityEvalDAO accc = new AccumuloSelectivityEvalDAO(); + accc.setConf(conf); + accc.setConnector(accCon); + accc.setRdfEvalDAO(res); + accc.init(); + + BatchWriter bw1 = accCon.createBatchWriter("rya_prospects", config); + BatchWriter bw2 = accCon.createBatchWriter("rya_selectivity", config); + + String s1 = "predicateobject" + DELIM + "http://www.w3.org/2000/01/rdf-schema#label" + DELIM + "uri:dog"; + String s2 = "predicateobject" + DELIM + "uri:barksAt" + DELIM + "uri:cat"; + String s3 = "predicateobject" + DELIM + "uri:peesOn" + DELIM + "uri:hydrant"; + String s5 = "predicateobject" + DELIM + "uri:scratches" + DELIM + "uri:ears"; + String s4 = "predicateobject" + DELIM + "uri:eats" + DELIM + "uri:chickens"; + List<Mutation> mList = new ArrayList<Mutation>(); + List<Mutation> mList2 = new ArrayList<Mutation>(); + List<String> sList = Arrays.asList("subjectobject", "subjectpredicate", "subjectsubject"); + Mutation m1, m2, m3, m4, m5, m6; + + m1 = new Mutation(s1 + DELIM + "1"); + m1.put(new Text("count"), new Text(""), new Value("1".getBytes())); + m2 = new Mutation(s2 + DELIM + "1"); + m2.put(new Text("count"), new Text(""), new Value("1".getBytes())); + m3 = new Mutation(s3 + DELIM + "1"); + m3.put(new Text("count"), new Text(""), new Value("1".getBytes())); + m4 = new Mutation(s4 + DELIM + "1"); + m4.put(new Text("count"), new Text(""), new Value("1".getBytes())); + m5 = new Mutation(s5 + DELIM + "1"); + m5.put(new Text("count"), new Text(""), new Value("1".getBytes())); + mList.add(m1); + mList.add(m2); + mList.add(m3); + mList.add(m4); + mList.add(m5); + + bw1.addMutations(mList); + bw1.close(); + +// Scanner scan = conn.createScanner("rya_prospects", new Authorizations()); +// scan.setRange(new Range()); +// +// for (Map.Entry<Key,Value> entry : scan) { +// System.out.println("Key row string is " + entry.getKey().getRow().toString()); +// System.out.println("Key is " + entry.getKey()); +// System.out.println("Value is " + (new String(entry.getValue().get()))); +// } + + m1 = new Mutation(s1); + m2 = new Mutation(s2); + m3 = new Mutation(s3); + m4 = new Mutation(s4); + m5 = new Mutation(s5); + m6 = new Mutation(new Text("subjectpredicateobject" + DELIM + "FullTableCardinality")); + m6.put(new Text("FullTableCardinality"), new Text("100"), EMPTY_VAL); + + + for (String s : sList) { + + m1.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m2.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m3.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m4.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m5.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + + } + mList2.add(m1); + mList2.add(m2); + mList2.add(m3); + mList2.add(m4); + mList2.add(m5); + mList2.add(m6); + bw2.addMutations(mList2); + bw2.close(); + + + TupleExpr te = getTupleExpr(q2); + + EntityOptimizer cco = new EntityOptimizer(accc); + System.out.println("Originial query is " + te); + cco.optimize(te, null, null); + + EntityCentricVisitor ccv = new EntityCentricVisitor(); + te.visit(ccv); + + Assert.assertEquals(2, ccv.getCcNodes().size()); + + + System.out.println(te); + + } + + + + + + + @Test + public void testOptimizeQ3SamePriority() throws Exception { + + AccumuloSelectivityEvalDAO accc = new AccumuloSelectivityEvalDAO(); + accc.setConf(conf); + accc.setConnector(accCon); + accc.setRdfEvalDAO(res); + accc.init(); + + BatchWriter bw1 = accCon.createBatchWriter("rya_prospects", config); + BatchWriter bw2 = accCon.createBatchWriter("rya_selectivity", config); + + String s1 = "predicateobject" + DELIM + "http://www.w3.org/2000/01/rdf-schema#label" + DELIM + "uri:dog"; + String s2 = "predicateobject" + DELIM + "uri:barksAt" + DELIM + "uri:cat"; + String s3 = "predicateobject" + DELIM + "uri:peesOn" + DELIM + "uri:hydrant"; + String s5 = "predicateobject" + DELIM + "uri:scratches" + DELIM + "uri:ears"; + String s4 = "predicateobject" + DELIM + "uri:eats" + DELIM + "uri:chickens"; + String s6 = "predicateobject" + DELIM + "uri:eats" + DELIM + "uri:kibble"; + String s7 = "predicateobject" + DELIM + "uri:rollsIn" + DELIM + "uri:mud"; + String s8 = "predicateobject" + DELIM + "uri:runsIn" + DELIM + "uri:field"; + String s9 = "predicateobject" + DELIM + "uri:smells" + DELIM + "uri:butt"; + String s10 = "predicateobject" + DELIM + "uri:eats" + DELIM + "uri:sticks"; + String s11 = "predicateobject" + DELIM + "uri:watches" + DELIM + "uri:television"; + + + + + List<Mutation> mList = new ArrayList<Mutation>(); + List<Mutation> mList2 = new ArrayList<Mutation>(); + List<String> sList = Arrays.asList("subjectobject", "subjectpredicate", "subjectsubject"); + Mutation m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12; + + m1 = new Mutation(s1 + DELIM + "1"); + m1.put(new Text("count"), new Text(""), new Value("1".getBytes())); + m2 = new Mutation(s2 + DELIM + "1"); + m2.put(new Text("count"), new Text(""), new Value("1".getBytes())); + m3 = new Mutation(s3 + DELIM + "1"); + m3.put(new Text("count"), new Text(""), new Value("1".getBytes())); + m4 = new Mutation(s4 + DELIM + "1"); + m4.put(new Text("count"), new Text(""), new Value("1".getBytes())); + m5 = new Mutation(s5 + DELIM + "1"); + m5.put(new Text("count"), new Text(""), new Value("1".getBytes())); + m6 = new Mutation(s6 + DELIM + "1"); + m6.put(new Text("count"), new Text(""), new Value("1".getBytes())); + m7 = new Mutation(s7 + DELIM + "1"); + m7.put(new Text("count"), new Text(""), new Value("1".getBytes())); + m8 = new Mutation(s8 + DELIM + "1"); + m8.put(new Text("count"), new Text(""), new Value("1".getBytes())); + m9 = new Mutation(s9 + DELIM + "1"); + m9.put(new Text("count"), new Text(""), new Value("1".getBytes())); + m10 = new Mutation(s10 + DELIM + "1"); + m10.put(new Text("count"), new Text(""), new Value("1".getBytes())); + m11 = new Mutation(s11 + DELIM + "1"); + m11.put(new Text("count"), new Text(""), new Value("1".getBytes())); + + mList.add(m1); + mList.add(m2); + mList.add(m3); + mList.add(m4); + mList.add(m5); + mList.add(m6); + mList.add(m7); + mList.add(m8); + mList.add(m9); + mList.add(m10); + mList.add(m11); + + bw1.addMutations(mList); + bw1.close(); + + + m1 = new Mutation(s1); + m2 = new Mutation(s2); + m3 = new Mutation(s3); + m4 = new Mutation(s4); + m5 = new Mutation(s5); + m6 = new Mutation(s6); + m7 = new Mutation(s7); + m8 = new Mutation(s8); + m9 = new Mutation(s9); + m10 = new Mutation(s10); + m11 = new Mutation(s11); + m12 = new Mutation(new Text("subjectpredicateobject" + DELIM + "FullTableCardinality")); + m12.put(new Text("FullTableCardinality"), new Text("100"), EMPTY_VAL); + + + for (String s : sList) { + + m1.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m2.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m3.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m4.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m5.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m6.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m7.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m8.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m9.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m10.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m11.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + + + } + mList2.add(m1); + mList2.add(m2); + mList2.add(m3); + mList2.add(m4); + mList2.add(m5); + mList2.add(m6); + mList2.add(m7); + mList2.add(m8); + mList2.add(m9); + mList2.add(m10); + mList2.add(m11); + mList2.add(m12); + bw2.addMutations(mList2); + bw2.close(); + + + + TupleExpr te = getTupleExpr(q3); + + EntityOptimizer cco = new EntityOptimizer(accc); + System.out.println("Originial query is " + te); + cco.optimize(te, null, null); + + + EntityCentricVisitor ccv = new EntityCentricVisitor(); + te.visit(ccv); + + Assert.assertEquals(3, ccv.getCcNodes().size()); + + System.out.println(te); + + } + + + + + + + @Test + public void testOptimizeQ2DiffPriority() throws Exception { + + AccumuloSelectivityEvalDAO accc = new AccumuloSelectivityEvalDAO(); + accc.setConf(conf); + accc.setConnector(accCon); + accc.setRdfEvalDAO(res); + accc.init(); + + BatchWriter bw1 = accCon.createBatchWriter("rya_prospects", config); + BatchWriter bw2 = accCon.createBatchWriter("rya_selectivity", config); + + String s1 = "predicateobject" + DELIM + "http://www.w3.org/2000/01/rdf-schema#label" + DELIM + "uri:dog"; + String s2 = "predicateobject" + DELIM + "uri:barksAt" + DELIM + "uri:cat"; + String s3 = "predicate" + DELIM + "uri:peesOn"; + String s5 = "predicateobject" + DELIM + "uri:scratches" + DELIM + "uri:ears"; + String s4 = "predicateobject" + DELIM + "uri:eats" + DELIM + "uri:chickens"; + List<Mutation> mList = new ArrayList<Mutation>(); + List<Mutation> mList2 = new ArrayList<Mutation>(); + List<String> sList = Arrays.asList("subjectobject", "subjectpredicate", "subjectsubject", "objectsubject", "objectpredicate", "objectobject"); + Mutation m1, m2, m3, m4, m5, m6; + + m1 = new Mutation(s1 + DELIM + "1"); + m1.put(new Text("count"), new Text(""), new Value("1".getBytes())); + m2 = new Mutation(s2 + DELIM + "1"); + m2.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m3 = new Mutation(s3 + DELIM + "1"); + m3.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m4 = new Mutation(s4 + DELIM + "1"); + m4.put(new Text("count"), new Text(""), new Value("3".getBytes())); + m5 = new Mutation(s5 + DELIM + "1"); + m5.put(new Text("count"), new Text(""), new Value("3".getBytes())); + mList.add(m1); + mList.add(m2); + mList.add(m3); + mList.add(m4); + mList.add(m5); + + bw1.addMutations(mList); + bw1.close(); + + + m1 = new Mutation(s1); + m2 = new Mutation(s2); + m3 = new Mutation(s3); + m4 = new Mutation(s4); + m5 = new Mutation(s5); + m6 = new Mutation(new Text("subjectpredicateobject" + DELIM + "FullTableCardinality")); + m6.put(new Text("FullTableCardinality"), new Text("100"), EMPTY_VAL); + + + for (String s : sList) { + + m1.put(new Text(s), new Text(Integer.toString(2)), EMPTY_VAL); + m2.put(new Text(s), new Text(Integer.toString(2)), EMPTY_VAL); + m3.put(new Text(s), new Text(Integer.toString(3)), EMPTY_VAL); + m4.put(new Text(s), new Text(Integer.toString(3)), EMPTY_VAL); + m5.put(new Text(s), new Text(Integer.toString(3)), EMPTY_VAL); + + } + mList2.add(m1); + mList2.add(m2); + mList2.add(m3); + mList2.add(m4); + mList2.add(m5); + mList2.add(m6); + bw2.addMutations(mList2); + bw2.close(); + + + TupleExpr te = getTupleExpr(q5); + + EntityOptimizer cco = new EntityOptimizer(accc); + System.out.println("Originial query is " + te); + cco.optimize(te, null, null); + + EntityCentricVisitor ccv = new EntityCentricVisitor(); + te.visit(ccv); + + List<QueryModelNode> nodes = Lists.newArrayList(ccv.getCcNodes()); + + Assert.assertEquals(2, nodes.size()); + + for(QueryModelNode q: nodes) { + + if(((EntityTupleSet)q).getStarQuery().getNodes().size() == 2) { + Assert.assertEquals("h", ((EntityTupleSet)q).getStarQuery().getCommonVarName()); + } else if(((EntityTupleSet)q).getStarQuery().getNodes().size() == 3) { + Assert.assertEquals("m", ((EntityTupleSet)q).getStarQuery().getCommonVarName()); + } else { + Assert.assertTrue(false); + } + } + + + + + + System.out.println(te); + + } + + + + + + + @Test + public void testOptimizeQ2DiffPriority2() throws Exception { + + AccumuloSelectivityEvalDAO accc = new AccumuloSelectivityEvalDAO(); + accc.setConf(conf); + accc.setConnector(accCon); + accc.setRdfEvalDAO(res); + accc.init(); + + BatchWriter bw1 = accCon.createBatchWriter("rya_prospects", config); + BatchWriter bw2 = accCon.createBatchWriter("rya_selectivity", config); + + String s1 = "predicateobject" + DELIM + "http://www.w3.org/2000/01/rdf-schema#label" + DELIM + "uri:dog"; + String s2 = "predicateobject" + DELIM + "uri:barksAt" + DELIM + "uri:cat"; + String s3 = "predicate" + DELIM + "uri:peesOn"; + String s5 = "predicateobject" + DELIM + "uri:scratches" + DELIM + "uri:ears"; + String s4 = "predicateobject" + DELIM + "uri:eats" + DELIM + "uri:chickens"; + List<Mutation> mList = new ArrayList<Mutation>(); + List<Mutation> mList2 = new ArrayList<Mutation>(); + List<String> sList = Arrays.asList("subjectobject", "subjectpredicate", "subjectsubject", "objectsubject", "objectpredicate", "objectobject"); + Mutation m1, m2, m3, m4, m5, m6; + + m1 = new Mutation(s1 + DELIM + "1"); + m1.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m2 = new Mutation(s2 + DELIM + "1"); + m2.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m3 = new Mutation(s3 + DELIM + "1"); + m3.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m4 = new Mutation(s4 + DELIM + "1"); + m4.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m5 = new Mutation(s5 + DELIM + "1"); + m5.put(new Text("count"), new Text(""), new Value("2".getBytes())); + mList.add(m1); + mList.add(m2); + mList.add(m3); + mList.add(m4); + mList.add(m5); + + bw1.addMutations(mList); + bw1.close(); + + + m1 = new Mutation(s1); + m2 = new Mutation(s2); + m3 = new Mutation(s3); + m4 = new Mutation(s4); + m5 = new Mutation(s5); + m6 = new Mutation(new Text("subjectpredicateobject" + DELIM + "FullTableCardinality")); + m6.put(new Text("FullTableCardinality"), new Text("100"), EMPTY_VAL); + + + for (String s : sList) { + + m1.put(new Text(s), new Text(Integer.toString(2)), EMPTY_VAL); + m2.put(new Text(s), new Text(Integer.toString(2)), EMPTY_VAL); + m3.put(new Text(s), new Text(Integer.toString(2)), EMPTY_VAL); + m4.put(new Text(s), new Text(Integer.toString(2)), EMPTY_VAL); + m5.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + + } + mList2.add(m1); + mList2.add(m2); + mList2.add(m3); + mList2.add(m4); + mList2.add(m5); + mList2.add(m6); + bw2.addMutations(mList2); + bw2.close(); + + + TupleExpr te = getTupleExpr(q5); + + EntityOptimizer cco = new EntityOptimizer(accc); + System.out.println("Originial query is " + te); + cco.optimize(te, null, null); + + EntityCentricVisitor ccv = new EntityCentricVisitor(); + te.visit(ccv); + + List<QueryModelNode> nodes = Lists.newArrayList(ccv.getCcNodes()); + + Assert.assertEquals(2, nodes.size()); + + + for(QueryModelNode q: nodes) { + + if(((EntityTupleSet)q).getStarQuery().getNodes().size() == 2) { + Assert.assertEquals("m", ((EntityTupleSet)q).getStarQuery().getCommonVarName()); + } else if(((EntityTupleSet)q).getStarQuery().getNodes().size() == 3) { + Assert.assertEquals("h", ((EntityTupleSet)q).getStarQuery().getCommonVarName()); + } else { + Assert.assertTrue(false); + } + } + + + System.out.println(te); + + } + + + + + + + @Test + public void testOptimizeQ6DiffPriority() throws Exception { + + AccumuloSelectivityEvalDAO accc = new AccumuloSelectivityEvalDAO(); + accc.setConf(conf); + accc.setConnector(accCon); + accc.setRdfEvalDAO(res); + accc.init(); + + BatchWriter bw1 = accCon.createBatchWriter("rya_prospects", config); + BatchWriter bw2 = accCon.createBatchWriter("rya_selectivity", config); + + String s1 = "predicateobject" + DELIM + "http://www.w3.org/2000/01/rdf-schema#label" + DELIM + "uri:dog"; + String s2 = "predicateobject" + DELIM + "uri:barksAt" + DELIM + "uri:cat"; + String s3 = "predicateobject" + DELIM + "uri:peesOn" + DELIM + "uri:hydrant"; + String s5 = "predicateobject" + DELIM + "uri:scratches" + DELIM + "uri:ears"; + String s4 = "predicateobject" + DELIM + "uri:eats" + DELIM + "uri:chickens"; + String s6 = "predicateobject" + DELIM + "uri:eats" + DELIM + "uri:kibble"; + String s7 = "predicateobject" + DELIM + "uri:rollsIn" + DELIM + "uri:mud"; + String s8 = "predicateobject" + DELIM + "uri:runsIn" + DELIM + "uri:field"; + String s9 = "predicate" + DELIM + "uri:smells" ; + String s10 = "predicateobject" + DELIM + "uri:eats" + DELIM + "uri:sticks"; + String s11 = "predicate" + DELIM + "uri:watches"; + + + + + List<Mutation> mList = new ArrayList<Mutation>(); + List<Mutation> mList2 = new ArrayList<Mutation>(); + List<String> sList = Arrays.asList("subjectobject", "subjectpredicate", "subjectsubject","objectsubject", "objectpredicate", "objectobject"); + Mutation m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12; + + m1 = new Mutation(s1 + DELIM + "1"); + m1.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m2 = new Mutation(s2 + DELIM + "1"); + m2.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m3 = new Mutation(s3 + DELIM + "1"); + m3.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m4 = new Mutation(s4 + DELIM + "1"); + m4.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m5 = new Mutation(s5 + DELIM + "1"); + m5.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m6 = new Mutation(s6 + DELIM + "1"); + m6.put(new Text("count"), new Text(""), new Value("1".getBytes())); + m7 = new Mutation(s7 + DELIM + "1"); + m7.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m8 = new Mutation(s8 + DELIM + "1"); + m8.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m9 = new Mutation(s9 + DELIM + "1"); + m9.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m10 = new Mutation(s10 + DELIM + "1"); + m10.put(new Text("count"), new Text(""), new Value("1".getBytes())); + m11 = new Mutation(s11 + DELIM + "1"); + m11.put(new Text("count"), new Text(""), new Value("2".getBytes())); + + mList.add(m1); + mList.add(m2); + mList.add(m3); + mList.add(m4); + mList.add(m5); + mList.add(m6); + mList.add(m7); + mList.add(m8); + mList.add(m9); + mList.add(m10); + mList.add(m11); + + bw1.addMutations(mList); + bw1.close(); + + + m1 = new Mutation(s1); + m2 = new Mutation(s2); + m3 = new Mutation(s3); + m4 = new Mutation(s4); + m5 = new Mutation(s5); + m6 = new Mutation(s6); + m7 = new Mutation(s7); + m8 = new Mutation(s8); + m9 = new Mutation(s9); + m10 = new Mutation(s10); + m11 = new Mutation(s11); + m12 = new Mutation(new Text("subjectpredicateobject" + DELIM + "FullTableCardinality")); + m12.put(new Text("FullTableCardinality"), new Text("100"), EMPTY_VAL); + + + for (String s : sList) { + + m1.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m2.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m3.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m4.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m5.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m6.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m7.put(new Text(s), new Text(Integer.toString(2)), EMPTY_VAL); + m8.put(new Text(s), new Text(Integer.toString(2)), EMPTY_VAL); + m9.put(new Text(s), new Text(Integer.toString(2)), EMPTY_VAL); + m10.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m11.put(new Text(s), new Text(Integer.toString(2)), EMPTY_VAL); + + + } + mList2.add(m1); + mList2.add(m2); + mList2.add(m3); + mList2.add(m4); + mList2.add(m5); + mList2.add(m6); + mList2.add(m7); + mList2.add(m8); + mList2.add(m9); + mList2.add(m10); + mList2.add(m11); + mList2.add(m12); + bw2.addMutations(mList2); + bw2.close(); + + + + TupleExpr te = getTupleExpr(q6); + + EntityOptimizer cco = new EntityOptimizer(accc); + System.out.println("Originial query is " + te); + cco.optimize(te, null, null); + + EntityCentricVisitor ccv = new EntityCentricVisitor(); + te.visit(ccv); + + List<QueryModelNode> nodes = Lists.newArrayList(ccv.getCcNodes()); + + Assert.assertEquals(3, nodes.size()); + List<String> cVarList = Lists.newArrayList(); + cVarList.add("i"); + cVarList.add("m"); + + for(QueryModelNode q: nodes) { + + if(((EntityTupleSet)q).getStarQuery().getNodes().size() == 2) { + String s = ((EntityTupleSet)q).getStarQuery().getCommonVarName(); + System.out.println("node is " + q + " and common var is " + s); + System.out.println("star query is " + ((EntityTupleSet)q).getStarQuery()); + Assert.assertTrue(cVarList.contains(s)); + cVarList.remove(s); + } else if(((EntityTupleSet)q).getStarQuery().getNodes().size() == 3) { + Assert.assertEquals("h", ((EntityTupleSet)q).getStarQuery().getCommonVarName()); + } else { + Assert.assertTrue(false); + } + } + + + System.out.println(te); + + } + + + + + + + + + + + + @Test + public void testOptimizeConstantPriority() throws Exception { + + AccumuloSelectivityEvalDAO accc = new AccumuloSelectivityEvalDAO(); + accc.setConf(conf); + accc.setConnector(accCon); + accc.setRdfEvalDAO(res); + accc.init(); + + BatchWriter bw1 = accCon.createBatchWriter("rya_prospects", config); + BatchWriter bw2 = accCon.createBatchWriter("rya_selectivity", config); + + String s1 = "predicateobject" + DELIM + "http://www.w3.org/2000/01/rdf-schema#label" + DELIM + "uri:chickens"; + String s2 = "predicateobject" + DELIM + "uri:barksAt" + DELIM + "uri:chickens"; + String s3 = "predicate" + DELIM + "uri:peesOn"; + String s5 = "predicateobject" + DELIM + "uri:scratches" + DELIM + "uri:ears"; + String s4 = "predicateobject" + DELIM + "uri:eats" + DELIM + "uri:chickens"; + List<Mutation> mList = new ArrayList<Mutation>(); + List<Mutation> mList2 = new ArrayList<Mutation>(); + List<String> sList = Arrays.asList("subjectobject", "subjectpredicate", "subjectsubject", "objectsubject", + "objectpredicate", "objectobject"); + Mutation m1, m2, m3, m4, m5, m6; + + m1 = new Mutation(s1 + DELIM + "1"); + m1.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m2 = new Mutation(s2 + DELIM + "1"); + m2.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m3 = new Mutation(s3 + DELIM + "1"); + m3.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m4 = new Mutation(s4 + DELIM + "1"); + m4.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m5 = new Mutation(s5 + DELIM + "1"); + m5.put(new Text("count"), new Text(""), new Value("2".getBytes())); + mList.add(m1); + mList.add(m2); + mList.add(m3); + mList.add(m4); + mList.add(m5); + + bw1.addMutations(mList); + bw1.close(); + + m1 = new Mutation(s1); + m2 = new Mutation(s2); + m3 = new Mutation(s3); + m4 = new Mutation(s4); + m5 = new Mutation(s5); + m6 = new Mutation(new Text("subjectpredicateobject" + DELIM + "FullTableCardinality")); + m6.put(new Text("FullTableCardinality"), new Text("100"), EMPTY_VAL); + + for (String s : sList) { + + m1.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m2.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m3.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m4.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m5.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + + } + mList2.add(m1); + mList2.add(m2); + mList2.add(m3); + mList2.add(m4); + mList2.add(m5); + mList2.add(m6); + bw2.addMutations(mList2); + bw2.close(); + + TupleExpr te = getTupleExpr(q7); + + EntityOptimizer cco = new EntityOptimizer(accc); + System.out.println("Originial query is " + te); + cco.optimize(te, null, null); + + EntityCentricVisitor ccv = new EntityCentricVisitor(); + te.visit(ccv); + + List<QueryModelNode> nodes = Lists.newArrayList(ccv.getCcNodes()); + System.out.println("Test 7 nodes are :" + nodes); + + Assert.assertEquals(2, nodes.size()); + + for (QueryModelNode q : nodes) { + + if (((EntityTupleSet) q).getStarQuery().getNodes().size() == 2) { + Assert.assertEquals("m", ((EntityTupleSet) q).getStarQuery().getCommonVarName()); + } else if (((EntityTupleSet) q).getStarQuery().getNodes().size() == 3) { + Assert.assertEquals("uri:chickens", ((EntityTupleSet) q).getStarQuery().getCommonVarName()); + } else { + Assert.assertTrue(false); + } + } + + System.out.println(te); + + } + + + + + + + @Test + public void testOptimizeFilters() throws Exception { + + AccumuloSelectivityEvalDAO accc = new AccumuloSelectivityEvalDAO(); + accc.setConf(conf); + accc.setConnector(accCon); + accc.setRdfEvalDAO(res); + accc.init(); + + BatchWriter bw1 = accCon.createBatchWriter("rya_prospects", config); + BatchWriter bw2 = accCon.createBatchWriter("rya_selectivity", config); + + String s1 = "predicateobject" + DELIM + "http://www.w3.org/2000/01/rdf-schema#label" + DELIM + "uri:chickens"; + String s2 = "predicateobject" + DELIM + "uri:barksAt" + DELIM + "uri:chickens"; + String s3 = "predicate" + DELIM + "uri:peesOn"; + String s5 = "predicateobject" + DELIM + "uri:scratches" + DELIM + "uri:ears"; + String s4 = "predicateobject" + DELIM + "uri:eats" + DELIM + "uri:chickens"; + List<Mutation> mList = new ArrayList<Mutation>(); + List<Mutation> mList2 = new ArrayList<Mutation>(); + List<String> sList = Arrays.asList("subjectobject", "subjectpredicate", "subjectsubject", "objectsubject", + "objectpredicate", "objectobject"); + Mutation m1, m2, m3, m4, m5, m6; + + m1 = new Mutation(s1 + DELIM + "1"); + m1.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m2 = new Mutation(s2 + DELIM + "1"); + m2.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m3 = new Mutation(s3 + DELIM + "1"); + m3.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m4 = new Mutation(s4 + DELIM + "1"); + m4.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m5 = new Mutation(s5 + DELIM + "1"); + m5.put(new Text("count"), new Text(""), new Value("2".getBytes())); + mList.add(m1); + mList.add(m2); + mList.add(m3); + mList.add(m4); + mList.add(m5); + + bw1.addMutations(mList); + bw1.close(); + + m1 = new Mutation(s1); + m2 = new Mutation(s2); + m3 = new Mutation(s3); + m4 = new Mutation(s4); + m5 = new Mutation(s5); + m6 = new Mutation(new Text("subjectpredicateobject" + DELIM + "FullTableCardinality")); + m6.put(new Text("FullTableCardinality"), new Text("100"), EMPTY_VAL); + + for (String s : sList) { + + m1.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m2.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m3.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m4.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m5.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + + } + mList2.add(m1); + mList2.add(m2); + mList2.add(m3); + mList2.add(m4); + mList2.add(m5); + mList2.add(m6); + bw2.addMutations(mList2); + bw2.close(); + + TupleExpr te = getTupleExpr(q8); + (new FilterOptimizer()).optimize(te,null,null); + + EntityOptimizer cco = new EntityOptimizer(accc); + System.out.println("Originial query is " + te); + cco.optimize(te, null, null); + + + EntityCentricVisitor ccv = new EntityCentricVisitor(); + te.visit(ccv); + + List<QueryModelNode> nodes = Lists.newArrayList(ccv.getCcNodes()); + System.out.println("Test 8 nodes are :" + nodes); + + Assert.assertEquals(2, nodes.size()); + + for (QueryModelNode q : nodes) { + + if (((EntityTupleSet) q).getStarQuery().getNodes().size() == 2) { + Assert.assertEquals("m", ((EntityTupleSet) q).getStarQuery().getCommonVarName()); + } else if (((EntityTupleSet) q).getStarQuery().getNodes().size() == 3) { + Assert.assertEquals("uri:chickens", ((EntityTupleSet) q).getStarQuery().getCommonVarName()); + } else { + Assert.assertTrue(false); + } + } + + System.out.println(te); + + } + + + + + + + @Test + public void testOptimizeFilter2() throws Exception { + + AccumuloSelectivityEvalDAO accc = new AccumuloSelectivityEvalDAO(); + accc.setConf(conf); + accc.setConnector(accCon); + accc.setRdfEvalDAO(res); + accc.init(); + + BatchWriter bw1 = accCon.createBatchWriter("rya_prospects", config); + BatchWriter bw2 = accCon.createBatchWriter("rya_selectivity", config); + + String s1 = "predicateobject" + DELIM + "http://www.w3.org/2000/01/rdf-schema#label" + DELIM + "uri:dog"; + String s2 = "predicateobject" + DELIM + "uri:barksAt" + DELIM + "uri:cat"; + String s3 = "predicateobject" + DELIM + "uri:peesOn" + DELIM + "uri:hydrant"; + String s5 = "predicateobject" + DELIM + "uri:scratches" + DELIM + "uri:ears"; + String s4 = "predicateobject" + DELIM + "uri:eats" + DELIM + "uri:chickens"; + String s6 = "predicateobject" + DELIM + "uri:eats" + DELIM + "uri:kibble"; + String s7 = "predicateobject" + DELIM + "uri:rollsIn" + DELIM + "uri:mud"; + String s8 = "predicateobject" + DELIM + "uri:runsIn" + DELIM + "uri:field"; + String s9 = "predicate" + DELIM + "uri:smells" ; + String s10 = "predicateobject" + DELIM + "uri:eats" + DELIM + "uri:sticks"; + String s11 = "predicate" + DELIM + "uri:watches"; + + + + + List<Mutation> mList = new ArrayList<Mutation>(); + List<Mutation> mList2 = new ArrayList<Mutation>(); + List<String> sList = Arrays.asList("subjectobject", "subjectpredicate", "subjectsubject","objectsubject", "objectpredicate", "objectobject"); + Mutation m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12; + + m1 = new Mutation(s1 + DELIM + "1"); + m1.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m2 = new Mutation(s2 + DELIM + "1"); + m2.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m3 = new Mutation(s3 + DELIM + "1"); + m3.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m4 = new Mutation(s4 + DELIM + "1"); + m4.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m5 = new Mutation(s5 + DELIM + "1"); + m5.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m6 = new Mutation(s6 + DELIM + "1"); + m6.put(new Text("count"), new Text(""), new Value("1".getBytes())); + m7 = new Mutation(s7 + DELIM + "1"); + m7.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m8 = new Mutation(s8 + DELIM + "1"); + m8.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m9 = new Mutation(s9 + DELIM + "1"); + m9.put(new Text("count"), new Text(""), new Value("2".getBytes())); + m10 = new Mutation(s10 + DELIM + "1"); + m10.put(new Text("count"), new Text(""), new Value("1".getBytes())); + m11 = new Mutation(s11 + DELIM + "1"); + m11.put(new Text("count"), new Text(""), new Value("2".getBytes())); + + mList.add(m1); + mList.add(m2); + mList.add(m3); + mList.add(m4); + mList.add(m5); + mList.add(m6); + mList.add(m7); + mList.add(m8); + mList.add(m9); + mList.add(m10); + mList.add(m11); + + bw1.addMutations(mList); + bw1.close(); + + + m1 = new Mutation(s1); + m2 = new Mutation(s2); + m3 = new Mutation(s3); + m4 = new Mutation(s4); + m5 = new Mutation(s5); + m6 = new Mutation(s6); + m7 = new Mutation(s7); + m8 = new Mutation(s8); + m9 = new Mutation(s9); + m10 = new Mutation(s10); + m11 = new Mutation(s11); + m12 = new Mutation(new Text("subjectpredicateobject" + DELIM + "FullTableCardinality")); + m12.put(new Text("FullTableCardinality"), new Text("100"), EMPTY_VAL); + + + for (String s : sList) { + + m1.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m2.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m3.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m4.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m5.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m6.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m7.put(new Text(s), new Text(Integer.toString(2)), EMPTY_VAL); + m8.put(new Text(s), new Text(Integer.toString(2)), EMPTY_VAL); + m9.put(new Text(s), new Text(Integer.toString(2)), EMPTY_VAL); + m10.put(new Text(s), new Text(Integer.toString(1)), EMPTY_VAL); + m11.put(new Text(s), new Text(Integer.toString(2)), EMPTY_VAL); + + + } + mList2.add(m1); + mList2.add(m2); + mList2.add(m3); + mList2.add(m4); + mList2.add(m5); + mList2.add(m6); + mList2.add(m7); + mList2.add(m8); + mList2.add(m9); + mList2.add(m10); + mList2.add(m11); + mList2.add(m12); + bw2.addMutations(mList2); + bw2.close(); + + + + TupleExpr te = getTupleExpr(q9); + System.out.println(te); + (new FilterOptimizer()).optimize(te,null,null); + + EntityOptimizer cco = new EntityOptimizer(accc); + System.out.println("Originial query is " + te); + cco.optimize(te, null, null); + + EntityCentricVisitor ccv = new EntityCentricVisitor(); + te.visit(ccv); + + List<QueryModelNode> nodes = Lists.newArrayList(ccv.getCcNodes()); + + Assert.assertEquals(3, nodes.size()); + List<String> cVarList = Lists.newArrayList(); + cVarList.add("i"); + cVarList.add("m"); + + for(QueryModelNode q: nodes) { + + if(((EntityTupleSet)q).getStarQuery().getNodes().size() == 2) { + String s = ((EntityTupleSet)q).getStarQuery().getCommonVarName(); + System.out.println("node is " + q + " and common var is " + s); + System.out.println("star query is " + ((EntityTupleSet)q).getStarQuery()); + Assert.assertTrue(cVarList.contains(s)); + cVarList.remove(s); + } else if(((EntityTupleSet)q).getStarQuery().getNodes().size() == 3) { + Assert.assertEquals("h", ((EntityTupleSet)q).getStarQuery().getCommonVarName()); + } else { + Assert.assertTrue(false); + } + } + + + System.out.println(te); + + } + + + + + + + + + + + + + + + private TupleExpr getTupleExpr(String query) throws MalformedQueryException { + + SPARQLParser sp = new SPARQLParser(); + ParsedQuery pq = sp.parseQuery(query, null); + + return pq.getTupleExpr(); + } + + + + + + + private class EntityCentricVisitor extends QueryModelVisitorBase<RuntimeException> { + + private Set<QueryModelNode> ccNodes = Sets.newHashSet(); + + public Set<QueryModelNode> getCcNodes() { + return ccNodes; + } + + + public void meetNode(QueryModelNode node) { + + if(node instanceof EntityTupleSet) { + ccNodes.add(node); + } + + super.meetNode(node); + } + + + + + } + + + + + + + + + +}
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/indexing/src/test/java/mvm/rya/indexing/accumulo/entity/StarQueryTest.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/test/java/mvm/rya/indexing/accumulo/entity/StarQueryTest.java b/extras/indexing/src/test/java/mvm/rya/indexing/accumulo/entity/StarQueryTest.java new file mode 100644 index 0000000..7d0082c --- /dev/null +++ b/extras/indexing/src/test/java/mvm/rya/indexing/accumulo/entity/StarQueryTest.java @@ -0,0 +1,270 @@ +package mvm.rya.indexing.accumulo.entity; + +import static org.junit.Assert.*; + +import java.util.List; +import java.util.Map; +import java.util.Set; + +import mvm.rya.accumulo.documentIndex.TextColumn; +import mvm.rya.api.domain.RyaType; +import mvm.rya.api.resolver.RdfToRyaConversions; +import mvm.rya.api.resolver.RyaContext; +import mvm.rya.api.resolver.RyaTypeResolverException; +import mvm.rya.indexing.accumulo.entity.StarQuery; + +import org.apache.hadoop.io.Text; +import org.junit.Assert; +import org.junit.Test; +import org.openrdf.model.Value; +import org.openrdf.model.ValueFactory; +import org.openrdf.model.impl.ValueFactoryImpl; +import org.openrdf.query.MalformedQueryException; +import org.openrdf.query.algebra.StatementPattern; +import org.openrdf.query.algebra.TupleExpr; +import org.openrdf.query.algebra.Var; +import org.openrdf.query.algebra.evaluation.QueryBindingSet; +import org.openrdf.query.algebra.helpers.StatementPatternCollector; +import org.openrdf.query.parser.ParsedQuery; +import org.openrdf.query.parser.sparql.SPARQLParser; + +import com.google.common.collect.Sets; +import com.google.common.primitives.Bytes; + +public class StarQueryTest { + + ValueFactory vf = new ValueFactoryImpl(); + + + @Test + public void testBasicFunctionality() { + + String q1 = "" // + + "SELECT ?X ?Y1 ?Y2 " // + + "{"// + + "GRAPH <http://joe> { " // + + "?X <uri:cf1> ?Y1 ."// + + "?X <uri:cf2> ?Y2 ."// + + "?X <uri:cf3> ?Y3 ."// + + "}" // + + "}"; + + + SPARQLParser parser = new SPARQLParser(); + + ParsedQuery pq1 = null; + try { + pq1 = parser.parseQuery(q1, null); + } catch (MalformedQueryException e) { + e.printStackTrace(); + } + + TupleExpr te1 = pq1.getTupleExpr(); + + System.out.println(te1); + List<StatementPattern> spList1 = StatementPatternCollector.process(te1); + + Assert.assertTrue(StarQuery.isValidStarQuery(spList1)); + + + StarQuery sq1 = new StarQuery(spList1); + + Var v = sq1.getCommonVar(); + + Assert.assertEquals("X", v.getName()); + Assert.assertEquals(null, v.getValue()); + Assert.assertEquals(v.getValue(), sq1.getCommonVarValue()); + Assert.assertTrue(!sq1.commonVarHasValue()); + Assert.assertEquals("X", sq1.getCommonVarName()); + Assert.assertTrue(sq1.isCommonVarURI()); + + Assert.assertTrue(sq1.hasContext()); + Assert.assertEquals("http://joe", sq1.getContextURI()); + + TextColumn[] cond = sq1.getColumnCond(); + + for(int i = 0; i < cond.length; i++ ) { + + Assert.assertEquals(cond[i].getColumnFamily().toString(), "uri:cf" + (i+1)); + Assert.assertEquals(cond[i].getColumnQualifier().toString(), "object"); + + } + + Set<String> unCommonVars = Sets.newHashSet(); + unCommonVars.add("Y1"); + unCommonVars.add("Y2"); + unCommonVars.add("Y3"); + Assert.assertEquals(unCommonVars, sq1.getUnCommonVars()); + + Map<String, Integer> varPos = sq1.getVarPos(); + + Assert.assertEquals(0, varPos.get("Y1").intValue()); + Assert.assertEquals(1, varPos.get("Y2").intValue()); + Assert.assertEquals(2, varPos.get("Y3").intValue()); + + QueryBindingSet bs1 = new QueryBindingSet(); + QueryBindingSet bs2 = new QueryBindingSet(); + + Value v1 = vf.createURI("uri:hank"); + Value v2 = vf.createURI("uri:bob"); + + bs1.addBinding("X",v1); + bs2.addBinding("X", v1); + bs2.addBinding("Y3", v2); + + Set<String> s1 = StarQuery.getCommonVars(sq1, bs1); + Set<String> s2 = StarQuery.getCommonVars(sq1, bs2); + + Set<String> s3 = Sets.newHashSet(); + Set<String> s4 = Sets.newHashSet(); + s3.add("X"); + s4.add("X"); + s4.add("Y3"); + + + Assert.assertEquals(s1, s3); + Assert.assertEquals(s2, s4); + + + + } + + + + + + + + + + @Test + public void testGetContrainedQuery() { + + String q1 = "" // + + "SELECT ?X ?Y1 ?Y2 " // + + "{"// + + "GRAPH <http://joe> { " // + + "?X <uri:cf1> ?Y1 ."// + + "?X <uri:cf2> ?Y2 ."// + + "?X <uri:cf3> ?Y3 ."// + + "}" // + + "}"; + + + SPARQLParser parser = new SPARQLParser(); + + ParsedQuery pq1 = null; + try { + pq1 = parser.parseQuery(q1, null); + } catch (MalformedQueryException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + TupleExpr te1 = pq1.getTupleExpr(); + + System.out.println(te1); + List<StatementPattern> spList1 = StatementPatternCollector.process(te1); + + StarQuery sq1 = new StarQuery(spList1); + + QueryBindingSet bs1 = new QueryBindingSet(); + QueryBindingSet bs2 = new QueryBindingSet(); + + Value v1 = vf.createURI("uri:hank"); + Value v2 = vf.createURI("uri:bob"); + + bs1.addBinding("X",v1); + bs2.addBinding("X", v1); + bs2.addBinding("Y3", v2); + + StarQuery sq2 = StarQuery.getConstrainedStarQuery(sq1, bs1); + StarQuery sq3 = StarQuery.getConstrainedStarQuery(sq1, bs2); + + Assert.assertTrue(sq2.commonVarHasValue()); + Assert.assertEquals(sq2.getCommonVarValue(), "uri:hank"); + + Assert.assertTrue(sq3.commonVarHasValue()); + Assert.assertEquals(sq3.getCommonVarValue(), "uri:hank"); + + + TextColumn[] tc1 = sq1.getColumnCond(); + TextColumn[] tc2 = sq2.getColumnCond(); + TextColumn[] tc3 = sq3.getColumnCond(); + + for(int i = 0; i < tc1.length; i++) { + + Assert.assertTrue(tc1[i].equals(tc2[i])); + if(i != 2) { + Assert.assertTrue(tc1[i].equals(tc3[i])); + } else { + Assert.assertEquals(tc3[i].getColumnFamily(), new Text("uri:cf3")); + RyaType objType = RdfToRyaConversions.convertValue(v2); + byte[][] b1 = null; + try { + b1 = RyaContext.getInstance().serializeType(objType); + } catch (RyaTypeResolverException e) { + e.printStackTrace(); + } + byte[] b2 = Bytes.concat("object".getBytes(), + "\u0000".getBytes(), b1[0], b1[1]); + Assert.assertEquals(tc3[i].getColumnQualifier(), new Text(b2)); + Assert.assertTrue(!tc3[i].isPrefix()); + } + } + + + + } + + + + + @Test + public void testConstantPriority() { + + String q1 = "" // + + "SELECT ?X " // + + "{"// + + "GRAPH <http://joe> { " // + + "?X <uri:cf1> <uri:obj1> ."// + + "?X <uri:cf2> <uri:obj1> ."// + + "?X <uri:cf3> <uri:obj1> ."// + + "}" // + + "}"; + + + SPARQLParser parser = new SPARQLParser(); + + ParsedQuery pq1 = null; + try { + pq1 = parser.parseQuery(q1, null); + } catch (MalformedQueryException e) { + e.printStackTrace(); + } + + TupleExpr te1 = pq1.getTupleExpr(); + + System.out.println(te1); + List<StatementPattern> spList1 = StatementPatternCollector.process(te1); + + Assert.assertTrue(StarQuery.isValidStarQuery(spList1)); + + + StarQuery sq1 = new StarQuery(spList1); + Var v = sq1.getCommonVar(); + + Assert.assertEquals("uri:obj1",v.getValue().stringValue()); + + + + } + + + + + + + + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/indexing/src/test/java/mvm/rya/indexing/accumulo/freetext/AccumuloFreeTextIndexerTest.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/test/java/mvm/rya/indexing/accumulo/freetext/AccumuloFreeTextIndexerTest.java b/extras/indexing/src/test/java/mvm/rya/indexing/accumulo/freetext/AccumuloFreeTextIndexerTest.java new file mode 100644 index 0000000..aeeb174 --- /dev/null +++ b/extras/indexing/src/test/java/mvm/rya/indexing/accumulo/freetext/AccumuloFreeTextIndexerTest.java @@ -0,0 +1,220 @@ +package mvm.rya.indexing.accumulo.freetext; + +/* + * #%L + * mvm.rya.indexing.accumulo + * %% + * Copyright (C) 2014 Rya + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import info.aduna.iteration.CloseableIteration; + +import java.util.HashSet; +import java.util.Map.Entry; +import java.util.Set; + +import junit.framework.Assert; +import mvm.rya.api.domain.RyaStatement; +import mvm.rya.api.domain.RyaType; +import mvm.rya.api.domain.RyaURI; +import mvm.rya.api.resolver.RdfToRyaConversions; +import mvm.rya.api.resolver.RyaToRdfConversions; +import mvm.rya.indexing.StatementContraints; +import mvm.rya.indexing.accumulo.ConfigUtils; + +import org.apache.accumulo.core.Constants; +import org.apache.accumulo.core.client.AccumuloException; +import org.apache.accumulo.core.client.AccumuloSecurityException; +import org.apache.accumulo.core.client.Scanner; +import org.apache.accumulo.core.client.TableExistsException; +import org.apache.accumulo.core.client.TableNotFoundException; +import org.apache.accumulo.core.client.admin.TableOperations; +import org.apache.accumulo.core.data.Key; +import org.apache.hadoop.conf.Configuration; +import org.junit.Before; +import org.junit.Test; +import org.openrdf.model.Statement; +import org.openrdf.model.URI; +import org.openrdf.model.Value; +import org.openrdf.model.ValueFactory; +import org.openrdf.model.impl.URIImpl; +import org.openrdf.model.impl.ValueFactoryImpl; +import org.openrdf.model.vocabulary.RDFS; + +import com.google.common.collect.Sets; + +public class AccumuloFreeTextIndexerTest { + private static final StatementContraints EMPTY_CONSTRAINTS = new StatementContraints(); + + Configuration conf; + + @Before + public void before() throws Exception { + String tableName = "triplestore_freetext"; + String termTableName = "triplestore_freetext_term"; + conf = new Configuration(); + conf.setBoolean(ConfigUtils.USE_MOCK_INSTANCE, true); + conf.set(ConfigUtils.CLOUDBASE_USER, "USERNAME"); + conf.set(ConfigUtils.CLOUDBASE_PASSWORD, "PASS"); + conf.set(ConfigUtils.FREE_TEXT_DOC_TABLENAME, tableName); + conf.set(ConfigUtils.FREE_TEXT_TERM_TABLENAME, termTableName); + conf.set(ConfigUtils.CLOUDBASE_AUTHS, "U"); + conf.setClass(ConfigUtils.TOKENIZER_CLASS, SimpleTokenizer.class, Tokenizer.class); + + createTable(conf, tableName); + createTable(conf, termTableName); + } + + private static void createTable(Configuration conf, String tablename) throws AccumuloException, AccumuloSecurityException, + TableNotFoundException, TableExistsException { + TableOperations tableOps = ConfigUtils.getConnector(conf).tableOperations(); + if (tableOps.exists(tablename)) { + tableOps.delete(tablename); + } + tableOps.create(tablename); + } + + @Test + public void testSearch() throws Exception { + + AccumuloFreeTextIndexer f = new AccumuloFreeTextIndexer(); + f.setConf(conf); + + ValueFactory vf = new ValueFactoryImpl(); + + URI subject = new URIImpl("foo:subj"); + URI predicate = RDFS.LABEL; + Value object = vf.createLiteral("this is a new hat"); + + URI context = new URIImpl("foo:context"); + + Statement statement = vf.createStatement(subject, predicate, object, context); + f.storeStatement(RdfToRyaConversions.convertStatement(statement)); + f.flush(); + + printTables(conf); + + Assert.assertEquals(Sets.newHashSet(), getSet(f.queryText("asdf", EMPTY_CONSTRAINTS))); + + Assert.assertEquals(Sets.newHashSet(), getSet(f.queryText("this & !is", EMPTY_CONSTRAINTS))); + + Assert.assertEquals(Sets.newHashSet(statement), getSet(f.queryText("this", EMPTY_CONSTRAINTS))); + Assert.assertEquals(Sets.newHashSet(statement), getSet(f.queryText("is", EMPTY_CONSTRAINTS))); + Assert.assertEquals(Sets.newHashSet(statement), getSet(f.queryText("a", EMPTY_CONSTRAINTS))); + Assert.assertEquals(Sets.newHashSet(statement), getSet(f.queryText("new", EMPTY_CONSTRAINTS))); + Assert.assertEquals(Sets.newHashSet(statement), getSet(f.queryText("hat", EMPTY_CONSTRAINTS))); + + Assert.assertEquals(Sets.newHashSet(statement), getSet(f.queryText("ha*", EMPTY_CONSTRAINTS))); + Assert.assertEquals(Sets.newHashSet(statement), getSet(f.queryText("*at", EMPTY_CONSTRAINTS))); + + Assert.assertEquals(Sets.newHashSet(statement), getSet(f.queryText("hat & new", EMPTY_CONSTRAINTS))); + + Assert.assertEquals(Sets.newHashSet(statement), getSet(f.queryText("this & hat & new", EMPTY_CONSTRAINTS))); + + Assert.assertEquals(Sets.newHashSet(), getSet(f.queryText("bat", EMPTY_CONSTRAINTS))); + Assert.assertEquals(Sets.newHashSet(), getSet(f.queryText("this & bat", EMPTY_CONSTRAINTS))); + + f.close(); + } + + @Test + public void testRestrictPredicatesSearch() throws Exception { + conf.setStrings(ConfigUtils.FREETEXT_PREDICATES_LIST, "pred:1,pred:2"); + + AccumuloFreeTextIndexer f = new AccumuloFreeTextIndexer(); + f.setConf(conf); + + // These should not be stored because they are not in the predicate list + f.storeStatement(new RyaStatement(new RyaURI("foo:subj1"), new RyaURI(RDFS.LABEL.toString()), new RyaType("invalid"))); + f.storeStatement(new RyaStatement(new RyaURI("foo:subj2"), new RyaURI(RDFS.COMMENT.toString()), new RyaType("invalid"))); + + RyaURI pred1 = new RyaURI("pred:1"); + RyaURI pred2 = new RyaURI("pred:2"); + + // These should be stored because they are in the predicate list + RyaStatement s3 = new RyaStatement(new RyaURI("foo:subj3"), pred1, new RyaType("valid")); + RyaStatement s4 = new RyaStatement(new RyaURI("foo:subj4"), pred2, new RyaType("valid")); + f.storeStatement(s3); + f.storeStatement(s4); + + // This should not be stored because the object is not a literal + f.storeStatement(new RyaStatement(new RyaURI("foo:subj5"), pred1, new RyaURI("in:valid"))); + + f.flush(); + + printTables(conf); + + Assert.assertEquals(Sets.newHashSet(), getSet(f.queryText("invalid", EMPTY_CONSTRAINTS))); + Assert.assertEquals(Sets.newHashSet(), getSet(f.queryText("in:valid", EMPTY_CONSTRAINTS))); + + Set<Statement> actual = getSet(f.queryText("valid", EMPTY_CONSTRAINTS)); + Assert.assertEquals(2, actual.size()); + Assert.assertTrue(actual.contains(RyaToRdfConversions.convertStatement(s3))); + Assert.assertTrue(actual.contains(RyaToRdfConversions.convertStatement(s4))); + + f.close(); + } + + @Test + public void testContextSearch() throws Exception { + + AccumuloFreeTextIndexer f = new AccumuloFreeTextIndexer(); + f.setConf(conf); + + ValueFactory vf = new ValueFactoryImpl(); + URI subject = new URIImpl("foo:subj"); + URI predicate = new URIImpl(RDFS.COMMENT.toString()); + Value object = vf.createLiteral("this is a new hat"); + URI context = new URIImpl("foo:context"); + + Statement statement = vf.createStatement(subject, predicate, object, context); + f.storeStatement(RdfToRyaConversions.convertStatement(statement)); + f.flush(); + + Assert.assertEquals(Sets.newHashSet(statement), getSet(f.queryText("hat", EMPTY_CONSTRAINTS))); + Assert.assertEquals(Sets.newHashSet(statement), getSet(f.queryText("hat", new StatementContraints().setContext(context)))); + Assert.assertEquals(Sets.newHashSet(), + getSet(f.queryText("hat", new StatementContraints().setContext(vf.createURI("foo:context2"))))); + + f.close(); + } + + public static void printTables(Configuration conf) throws AccumuloException, AccumuloSecurityException, TableNotFoundException { + TableOperations tops = ConfigUtils.getConnector(conf).tableOperations(); + + // print tables + String FORMAT = "%-20s %-20s %-40s %-40s\n"; + for (String table : tops.list()) { + System.out.println("Reading : " + table); + System.out.format(FORMAT, "--Row--", "--ColumnFamily--", "--ColumnQualifier--", "--Value--"); + Scanner s = ConfigUtils.getConnector(conf).createScanner(table, Constants.NO_AUTHS); + for (Entry<Key, org.apache.accumulo.core.data.Value> entry : s) { + Key k = entry.getKey(); + System.out.format(FORMAT, k.getRow(), k.getColumnFamily(), k.getColumnQualifier(), entry.getValue()); + } + System.out.println(); + } + + } + + private static <X> Set<X> getSet(CloseableIteration<X, ?> iter) throws Exception { + Set<X> set = new HashSet<X>(); + while (iter.hasNext()) { + set.add(iter.next()); + } + return set; + } +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/indexing/src/test/java/mvm/rya/indexing/accumulo/freetext/query/QueryParserTest.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/test/java/mvm/rya/indexing/accumulo/freetext/query/QueryParserTest.java b/extras/indexing/src/test/java/mvm/rya/indexing/accumulo/freetext/query/QueryParserTest.java new file mode 100644 index 0000000..3152ac3 --- /dev/null +++ b/extras/indexing/src/test/java/mvm/rya/indexing/accumulo/freetext/query/QueryParserTest.java @@ -0,0 +1,129 @@ +package mvm.rya.indexing.accumulo.freetext.query; + +/* + * #%L + * mvm.rya.indexing.accumulo + * %% + * Copyright (C) 2014 Rya + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.ArrayList; +import java.util.List; + +import mvm.rya.indexing.accumulo.freetext.query.ASTExpression; +import mvm.rya.indexing.accumulo.freetext.query.ASTTerm; +import mvm.rya.indexing.accumulo.freetext.query.Node; +import mvm.rya.indexing.accumulo.freetext.query.ParseException; +import mvm.rya.indexing.accumulo.freetext.query.QueryParser; +import mvm.rya.indexing.accumulo.freetext.query.TokenMgrError; + +import org.apache.commons.lang.StringUtils; +import org.junit.Assert; +import org.junit.Test; + +public class QueryParserTest { + + @Test + public void AssortmentTest() throws Exception { + runTest("a* or b", // + "([WILDTERM]a* OR [TERM]b)"); + + runTest("a and b", // + "([TERM]a AND [TERM]b)"); + + runTest("a b", // + "([TERM]a AND [TERM]b)"); + + runTest("a b c", // + "([TERM]a AND [TERM]b AND [TERM]c)"); + + runTest("(a and b)", // + "([TERM]a AND [TERM]b)"); + + runTest("(a and b) and c", // + "(([TERM]a AND [TERM]b) AND [TERM]c)"); + + runTest("alpha and beta or charlie and delta or (boo and par)", // + "(([TERM]alpha AND [TERM]beta) OR ([TERM]charlie AND [TERM]delta) OR ([TERM]boo AND [TERM]par))"); + + runTest("a and (b or c)", // + "([TERM]a AND ([TERM]b OR [TERM]c))"); + + runTest("not a and (b or c)", // + "(![TERM]a AND ([TERM]b OR [TERM]c))"); + + runTest("not a and not (b or c)", // + "(![TERM]a AND !([TERM]b OR [TERM]c))"); + + runTest("not a and not (b or \"c and d\")", // + "(![TERM]a AND !([TERM]b OR [QUOTED]\"c and d\"))"); + + runTest("((a and b) and c)", // + "(([TERM]a AND [TERM]b) AND [TERM]c)"); + + runTest("not(a and b)", // + "!([TERM]a AND [TERM]b)"); + + runTest("not(not(a and b))", // + "([TERM]a AND [TERM]b)"); + + runTest("(not(!a and b))", // + "!(![TERM]a AND [TERM]b)"); + + runTest("not(!a and b)", // + "!(![TERM]a AND [TERM]b)"); + + runTest("not a", // + "![TERM]a"); + + runTest("not(not a)", // + "[TERM]a"); + + runTest("(not(!A or B))", // + "!(![TERM]A OR [TERM]B)"); + + runTest("not \"!A\"", // + "![QUOTED]\"!A\""); +} + + private static void runTest(String query, String expected) throws ParseException, TokenMgrError { + Assert.assertEquals(expected, prettyPrint(QueryParser.parse(query))); + } + + public static String prettyPrint(Node s) { + if (s instanceof ASTTerm) { + ASTTerm a = (ASTTerm) s; + return (a.isNotFlag() ? "!" : "") + "[" + a.getType() + "]" + a.getTerm(); + } + + String prefix = ""; + String suffix = ""; + String join = " "; + if (s instanceof ASTExpression) { + ASTExpression a = (ASTExpression) s; + prefix = (a.isNotFlag() ? "!" : "") + "("; + suffix = ")"; + join = " " + a.getType() + " "; + } + + List<String> children = new ArrayList<String>(); + for (int i = 0; i < s.jjtGetNumChildren(); i++) { + children.add(prettyPrint(s.jjtGetChild(i))); + } + return prefix + StringUtils.join(children, join) + suffix; + + } +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/92ddfa59/extras/indexing/src/test/java/mvm/rya/indexing/accumulo/geo/GeoIndexerSfTest.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/test/java/mvm/rya/indexing/accumulo/geo/GeoIndexerSfTest.java b/extras/indexing/src/test/java/mvm/rya/indexing/accumulo/geo/GeoIndexerSfTest.java new file mode 100644 index 0000000..5149abe --- /dev/null +++ b/extras/indexing/src/test/java/mvm/rya/indexing/accumulo/geo/GeoIndexerSfTest.java @@ -0,0 +1,315 @@ +package mvm.rya.indexing.accumulo.geo; + +/* + * #%L + * mvm.rya.indexing.accumulo + * %% + * Copyright (C) 2014 Rya + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import info.aduna.iteration.CloseableIteration; + +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.UUID; + +import mvm.rya.accumulo.AccumuloRdfConfiguration; +import mvm.rya.api.domain.RyaStatement; +import mvm.rya.api.resolver.RdfToRyaConversions; +import mvm.rya.api.resolver.RyaToRdfConversions; +import mvm.rya.indexing.GeoIndexer; +import mvm.rya.indexing.StatementContraints; +import mvm.rya.indexing.accumulo.ConfigUtils; +import mvm.rya.indexing.accumulo.geo.GeoConstants; +import mvm.rya.indexing.accumulo.geo.GeoMesaGeoIndexer; + +import org.apache.accumulo.core.client.admin.TableOperations; +import org.apache.hadoop.conf.Configuration; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.openrdf.model.Resource; +import org.openrdf.model.Statement; +import org.openrdf.model.URI; +import org.openrdf.model.Value; +import org.openrdf.model.ValueFactory; +import org.openrdf.model.impl.StatementImpl; +import org.openrdf.model.impl.ValueFactoryImpl; + +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import com.vividsolutions.jts.geom.Coordinate; +import com.vividsolutions.jts.geom.Geometry; +import com.vividsolutions.jts.geom.GeometryFactory; +import com.vividsolutions.jts.geom.LineString; +import com.vividsolutions.jts.geom.LinearRing; +import com.vividsolutions.jts.geom.Point; +import com.vividsolutions.jts.geom.Polygon; +import com.vividsolutions.jts.geom.PrecisionModel; +import com.vividsolutions.jts.geom.impl.PackedCoordinateSequence; + +/** + * Tests all of the "simple functions" of the geoindexer. + */ +public class GeoIndexerSfTest { + private static Configuration conf; + private static GeometryFactory gf = new GeometryFactory(new PrecisionModel(), 4326); + private static GeoMesaGeoIndexer g; + + private static final StatementContraints EMPTY_CONSTRAINTS = new StatementContraints(); + + // Here is the landscape: + /** + * <pre> + * +---+---+---+---+---+---+---+ + * | F | | + * + A + + C + + * | | | + * +---+---+ E +---+---+ + * | | / | + * + B + /+---+---+ + * | | / | | + * +---+---+/--+---+---+ + * / | D | + * / +---+---+ + * </pre> + **/ + + private static final Polygon A = poly(bbox(0, 1, 4, 5)); + private static final Polygon B = poly(bbox(0, 1, 2, 3)); + private static final Polygon C = poly(bbox(4, 3, 6, 5)); + private static final Polygon D = poly(bbox(3, 0, 5, 2)); + + private static final Point F = point(2, 4); + + private static final LineString E = line(2, 0, 3, 3); + + private static final Map<Geometry, String> names = Maps.newHashMap(); + static { + names.put(A, "A"); + names.put(B, "B"); + names.put(C, "C"); + names.put(D, "D"); + names.put(E, "E"); + names.put(F, "F"); + } + + @Before + public void before() throws Exception { + System.out.println(UUID.randomUUID().toString()); + String tableName = "triplestore_geospacial"; + conf = new Configuration(); + conf.setBoolean(ConfigUtils.USE_MOCK_INSTANCE, true); + conf.set(ConfigUtils.CLOUDBASE_USER, "USERNAME"); + conf.set(ConfigUtils.CLOUDBASE_PASSWORD, "PASS"); + conf.set(ConfigUtils.GEO_TABLENAME, tableName); + conf.set(ConfigUtils.CLOUDBASE_AUTHS, "U"); + + TableOperations tops = ConfigUtils.getConnector(conf).tableOperations(); + // get all of the table names with the prefix + Set<String> toDel = Sets.newHashSet(); + for (String t : tops.list()) { + if (t.startsWith(tableName)) { + toDel.add(t); + } + } + for (String t : toDel) { + tops.delete(t); + } + + g = new GeoMesaGeoIndexer(); + g.setConf(conf); + g.storeStatement(statement(A)); + g.storeStatement(statement(B)); + g.storeStatement(statement(C)); + g.storeStatement(statement(D)); + g.storeStatement(statement(F)); + g.storeStatement(statement(E)); + } + + private static RyaStatement statement(Geometry geo) { + ValueFactory vf = new ValueFactoryImpl(); + Resource subject = vf.createURI("uri:" + names.get(geo)); + URI predicate = GeoConstants.GEO_AS_WKT; + Value object = vf.createLiteral(geo.toString(), GeoConstants.XMLSCHEMA_OGC_WKT); + return RdfToRyaConversions.convertStatement(new StatementImpl(subject, predicate, object)); + + } + + private static Point point(double x, double y) { + return gf.createPoint(new Coordinate(x, y)); + } + + private static LineString line(double x1, double y1, double x2, double y2) { + return new LineString(new PackedCoordinateSequence.Double(new double[] { x1, y1, x2, y2 }, 2), gf); + } + + private static Polygon poly(double[] arr) { + LinearRing r1 = gf.createLinearRing(new PackedCoordinateSequence.Double(arr, 2)); + Polygon p1 = gf.createPolygon(r1, new LinearRing[] {}); + return p1; + } + + private static double[] bbox(double x1, double y1, double x2, double y2) { + return new double[] { x1, y1, x1, y2, x2, y2, x2, y1, x1, y1 }; + } + + public void compare(CloseableIteration<Statement, ?> actual, Geometry... expected) throws Exception { + Set<Statement> expectedSet = Sets.newHashSet(); + for (Geometry geo : expected) { + expectedSet.add(RyaToRdfConversions.convertStatement(statement(geo))); + } + + Assert.assertEquals(expectedSet, getSet(actual)); + } + + private static <X> Set<X> getSet(CloseableIteration<X, ?> iter) throws Exception { + Set<X> set = new HashSet<X>(); + while (iter.hasNext()) { + set.add(iter.next()); + } + return set; + } + + private static Geometry[] EMPTY_RESULTS = {}; + + @Test + public void testEquals() throws Exception { + // point + compare(g.queryEquals(F, EMPTY_CONSTRAINTS), F); + compare(g.queryEquals(point(2, 2), EMPTY_CONSTRAINTS), EMPTY_RESULTS); + + // line + compare(g.queryEquals(E, EMPTY_CONSTRAINTS), E); + compare(g.queryEquals(line(2, 2, 3, 3), EMPTY_CONSTRAINTS), EMPTY_RESULTS); + + // poly + compare(g.queryEquals(A, EMPTY_CONSTRAINTS), A); + compare(g.queryEquals(poly(bbox(1, 1, 4, 5)), EMPTY_CONSTRAINTS), EMPTY_RESULTS); + + } + + @Test + public void testDisjoint() throws Exception { + // point + compare(g.queryDisjoint(F, EMPTY_CONSTRAINTS), B, C, D, E); + + // line + compare(g.queryDisjoint(E, EMPTY_CONSTRAINTS), B, C, D, F); + + // poly + compare(g.queryDisjoint(A, EMPTY_CONSTRAINTS), EMPTY_RESULTS); + compare(g.queryDisjoint(B, EMPTY_CONSTRAINTS), C, D, F, E); + } + + @Test + public void testIntersectsPoint() throws Exception { + // This seems like a bug + // compare(g.queryIntersects(F, EMPTY_CONSTRAINTS), A, F); + // compare(g.queryIntersects(F, EMPTY_CONSTRAINTS), EMPTY_RESULTS); + } + + @Test + public void testIntersectsLine() throws Exception { + // This seems like a bug + // compare(g.queryIntersects(E, EMPTY_CONSTRAINTS), A, E); + // compare(g.queryIntersects(E, EMPTY_CONSTRAINTS), EMPTY_RESULTS); + } + + @Test + public void testIntersectsPoly() throws Exception { + compare(g.queryIntersects(A, EMPTY_CONSTRAINTS), A, B, C, D, F, E); + } + + @Test + public void testTouchesPoint() throws Exception { + compare(g.queryTouches(F, EMPTY_CONSTRAINTS), EMPTY_RESULTS); + } + + @Test + public void testTouchesLine() throws Exception { + compare(g.queryTouches(E, EMPTY_CONSTRAINTS), EMPTY_RESULTS); + } + + @Test + public void testTouchesPoly() throws Exception { + compare(g.queryTouches(A, EMPTY_CONSTRAINTS), C); + } + + @Test + public void testCrossesPoint() throws Exception { + compare(g.queryCrosses(F, EMPTY_CONSTRAINTS), EMPTY_RESULTS); + } + + @Test + public void testCrossesLine() throws Exception { + // compare(g.queryCrosses(E, EMPTY_CONSTRAINTS), A); + } + + @Test + public void testCrossesPoly() throws Exception { + compare(g.queryCrosses(A, EMPTY_CONSTRAINTS), E); + } + + @Test + public void testWithin() throws Exception { + // point + // compare(g.queryWithin(F, EMPTY_CONSTRAINTS), F); + + // line + // compare(g.queryWithin(E, EMPTY_CONSTRAINTS), E); + + // poly + compare(g.queryWithin(A, EMPTY_CONSTRAINTS), A, B, F); + } + + @Test + public void testContainsPoint() throws Exception { + compare(g.queryContains(F, EMPTY_CONSTRAINTS), A, F); + } + + @Test + public void testContainsLine() throws Exception { + // compare(g.queryContains(E, EMPTY_CONSTRAINTS), E); + } + + @Test + public void testContainsPoly() throws Exception { + compare(g.queryContains(A, EMPTY_CONSTRAINTS), A); + compare(g.queryContains(B, EMPTY_CONSTRAINTS), A, B); + } + + @Test + public void testOverlapsPoint() throws Exception { + // compare(g.queryOverlaps(F, EMPTY_CONSTRAINTS), F); + // You cannot have overlapping points + // compare(g.queryOverlaps(F, EMPTY_CONSTRAINTS), EMPTY_RESULTS); + } + + @Test + public void testOverlapsLine() throws Exception { + // compare(g.queryOverlaps(E, EMPTY_CONSTRAINTS), A, E); + // You cannot have overlapping lines + // compare(g.queryOverlaps(E, EMPTY_CONSTRAINTS), EMPTY_RESULTS); + } + + @Test + public void testOverlapsPoly() throws Exception { + compare(g.queryOverlaps(A, EMPTY_CONSTRAINTS), D); + } + +}
