RYA-234-Metadata
Project: http://git-wip-us.apache.org/repos/asf/incubator-rya/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-rya/commit/11349b11 Tree: http://git-wip-us.apache.org/repos/asf/incubator-rya/tree/11349b11 Diff: http://git-wip-us.apache.org/repos/asf/incubator-rya/diff/11349b11 Branch: refs/heads/master Commit: 11349b115e7a47b430a1cb2593902cd1d72d7d6b Parents: 45efa55 Author: Caleb Meier <[email protected]> Authored: Tue Dec 13 12:26:29 2016 -0800 Committer: pujav65 <[email protected]> Committed: Mon Mar 13 10:00:26 2017 -0400 ---------------------------------------------------------------------- .../api/RdfCloudTripleStoreConfiguration.java | 38 +- .../rya/api/domain/StatementMetadata.java | 95 ++- .../rya/api/persist/query/RyaQueryEngine.java | 3 +- .../rya/api/domain/StatementMetadataTest.java | 3 +- .../accumulo/query/AccumuloRyaQueryEngine.java | 4 + .../rya/mongodb/MongoConnectorFactory.java | 5 + .../apache/rya/mongodb/MongoDBQueryEngine.java | 2 +- .../ValidIndexCombinationGenerator.java | 12 +- .../rya/indexing/accumulo/ConfigUtils.java | 6 +- .../matching/AbstractExternalSetMatcher.java | 148 +++++ .../AbstractExternalSetMatcherFactory.java | 46 ++ .../matching/AbstractExternalSetOptimizer.java | 145 +++++ .../external/matching/AbstractQuerySegment.java | 122 ++++ .../indexing/external/matching/BasicRater.java | 127 ++++ .../external/matching/ExternalSetConverter.java | 33 + .../external/matching/ExternalSetMatcher.java | 133 ++++ .../external/matching/ExternalSetProvider.java | 50 ++ .../external/matching/FlattenedOptional.java | 321 ++++++++++ .../indexing/external/matching/JoinSegment.java | 176 +++++ .../external/matching/JoinSegmentMatcher.java | 121 ++++ .../external/matching/MatcherUtilities.java | 45 ++ .../external/matching/OptionalJoinSegment.java | 168 +++++ .../matching/OptionalJoinSegmentMatcher.java | 164 +++++ .../matching/QueryNodeConsolidator.java | 618 ++++++++++++++++++ .../external/matching/QueryNodeListRater.java | 39 ++ .../matching/QueryNodesToTupleExpr.java | 188 ++++++ .../external/matching/QuerySegment.java | 85 +++ .../external/matching/QuerySegmentFactory.java | 60 ++ .../matching/TopOfQueryFilterRelocator.java | 95 +++ .../pcj/matching/AbstractPCJMatcher.java | 126 ---- .../pcj/matching/AbstractQuerySegment.java | 122 ---- .../pcj/matching/AccumuloIndexSetProvider.java | 223 +++++++ .../pcj/matching/FlattenedOptional.java | 331 ---------- .../rya/indexing/pcj/matching/JoinSegment.java | 130 ---- .../pcj/matching/JoinSegmentPCJMatcher.java | 106 --- .../pcj/matching/OptionalJoinSegment.java | 146 ----- .../matching/OptionalJoinSegmentPCJMatcher.java | 142 ----- .../matching/PCJExternalSetMatcherFactory.java | 45 ++ .../rya/indexing/pcj/matching/PCJMatcher.java | 76 --- .../pcj/matching/PCJMatcherFactory.java | 73 --- .../pcj/matching/PCJNodeConsolidator.java | 628 ------------------ .../rya/indexing/pcj/matching/PCJOptimizer.java | 339 ++-------- .../pcj/matching/PCJOptimizerUtilities.java | 5 +- .../pcj/matching/PCJToSegmentConverter.java | 117 ++++ .../pcj/matching/QueryNodesToTupleExpr.java | 194 ------ .../rya/indexing/pcj/matching/QuerySegment.java | 83 --- .../pcj/matching/TopOfQueryFilterRelocator.java | 97 --- .../MetadataNodeToSegmentConverter.java | 44 ++ .../matching/RyaQueryEngineFactory.java | 75 +++ ...tementMetadataExternalSetMatcherFactory.java | 50 ++ .../StatementMetadataExternalSetProvider.java | 113 ++++ .../matching/StatementMetadataNode.java | 638 +++++++++++++++++++ .../matching/StatementMetadataOptimizer.java | 92 +++ .../ValidIndexCombinationGeneratorTest.java | 507 --------------- .../IndexPlanValidatorTest.java | 24 +- .../external/AccumuloPcjIntegrationTest.java | 12 +- .../external/PrecompJoinOptimizerTest.java | 2 +- .../mongo/MongoFreeTextIndexerTest.java | 9 +- .../pcj/matching/FlattenedOptionalTest.java | 1 + .../pcj/matching/JoinSegmentPCJMatcherTest.java | 74 +-- .../indexing/pcj/matching/JoinSegmentTest.java | 29 +- .../OptionalJoinSegmentPCJMatcherTest.java | 41 +- .../pcj/matching/OptionalJoinSegmentTest.java | 20 +- .../pcj/matching/PCJNodeConsolidatorTest.java | 71 ++- .../indexing/pcj/matching/PCJOptimizerTest.java | 12 +- .../AccumuloStatementMetadataNodeTest.java | 379 +++++++++++ .../AccumuloStatementMetadataOptimizerIT.java | 284 +++++++++ .../metadata/MongoStatementMetadataIT.java | 275 ++++++++ .../MongoStatementMetadataNodeTest.java | 399 ++++++++++++ ...tatementMetadataExternalSetProviderTest.java | 178 ++++++ .../StatementMetadataOptimizerTest.java | 162 +++++ .../metadata/StatementMetadataTestUtils.java | 100 +++ .../benchmark/query/PCJOptimizerBenchmark.java | 6 +- 73 files changed, 6383 insertions(+), 3249 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/11349b11/common/rya.api/src/main/java/org/apache/rya/api/RdfCloudTripleStoreConfiguration.java ---------------------------------------------------------------------- diff --git a/common/rya.api/src/main/java/org/apache/rya/api/RdfCloudTripleStoreConfiguration.java b/common/rya.api/src/main/java/org/apache/rya/api/RdfCloudTripleStoreConfiguration.java index f1aa3cf..ae2e03a 100644 --- a/common/rya.api/src/main/java/org/apache/rya/api/RdfCloudTripleStoreConfiguration.java +++ b/common/rya.api/src/main/java/org/apache/rya/api/RdfCloudTripleStoreConfiguration.java @@ -1,5 +1,7 @@ package org.apache.rya.api; +import java.util.HashSet; + /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -22,7 +24,9 @@ package org.apache.rya.api; import java.util.List; +import java.util.Set; +import org.apache.rya.api.domain.RyaURI; import org.apache.rya.api.layout.TableLayoutStrategy; import org.apache.rya.api.layout.TablePrefixLayoutStrategy; import org.apache.rya.api.persist.RdfEvalStatsDAO; @@ -66,6 +70,8 @@ public abstract class RdfCloudTripleStoreConfiguration extends Configuration { public static final String CONF_OPTIMIZERS = "query.optimizers"; public static final String CONF_PCJ_OPTIMIZER = "pcj.query.optimizer"; public static final String CONF_PCJ_TABLES = "pcj.index.tables"; + public static final String CONF_STATEMENT_METADATA_PROPERTIES = "statement.metadata.properites"; + public static final String CONF_USE_STATEMENT_METADATA = "use.statement.metadata"; /** @@ -449,6 +455,37 @@ public abstract class RdfCloudTripleStoreConfiguration extends Configuration { } return pcjTables; } + + public void setUseStatementMetadata(boolean useMetadata) { + setBoolean(CONF_USE_STATEMENT_METADATA, useMetadata); + } + + public boolean getUseStatementMetadata() { + return getBoolean(CONF_USE_STATEMENT_METADATA, false); + } + + public void setStatementMetadataProperties(Set<RyaURI> metadataProperties) { + + String[] propArray = new String[metadataProperties.size()]; + int i = 0; + for(RyaURI uri: metadataProperties) { + propArray[i] = uri.getData(); + i++; + } + setStrings(CONF_STATEMENT_METADATA_PROPERTIES, propArray); + } + + + public Set<RyaURI> getStatementMetadataProperties() { + Set<RyaURI> uriSet = new HashSet<>(); + String[] uriStrings = getStrings(CONF_STATEMENT_METADATA_PROPERTIES); + if (uriStrings != null) { + for (String s : uriStrings) { + uriSet.add(new RyaURI(s)); + } + } + return uriSet; + } public void setPcjOptimizer(Class<? extends QueryOptimizer> optimizer) { @@ -464,7 +501,6 @@ public abstract class RdfCloudTripleStoreConfiguration extends Configuration { } else { return null; } - } http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/11349b11/common/rya.api/src/main/java/org/apache/rya/api/domain/StatementMetadata.java ---------------------------------------------------------------------- diff --git a/common/rya.api/src/main/java/org/apache/rya/api/domain/StatementMetadata.java b/common/rya.api/src/main/java/org/apache/rya/api/domain/StatementMetadata.java index a70e5da..83612b7 100644 --- a/common/rya.api/src/main/java/org/apache/rya/api/domain/StatementMetadata.java +++ b/common/rya.api/src/main/java/org/apache/rya/api/domain/StatementMetadata.java @@ -19,36 +19,47 @@ package org.apache.rya.api.domain; import java.io.UnsupportedEncodingException; +import java.lang.reflect.Type; import java.util.HashMap; import java.util.Map; import org.apache.rya.api.persist.RdfDAOException; +import org.openrdf.model.impl.URIImpl; +import com.google.common.base.Preconditions; +import com.google.common.reflect.TypeToken; import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.gson.JsonDeserializationContext; +import com.google.gson.JsonDeserializer; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonParseException; +import com.google.gson.JsonPrimitive; +import com.google.gson.JsonSerializationContext; +import com.google.gson.JsonSerializer; public class StatementMetadata { - - private static Gson gson = new Gson(); + + private static Gson gson = new GsonBuilder().enableComplexMapKeySerialization() + .registerTypeHierarchyAdapter(RyaType.class, new RyaTypeAdapter()).create();; public static StatementMetadata EMPTY_METADATA = new StatementMetadata(); - - private Map<String, String> metadataMap = new HashMap<String, String>(); - - public StatementMetadata() { - - } - - public void addMetadata(String key, String value){ - metadataMap.put(key, value); - } + + private Map<RyaURI, RyaType> metadataMap = new HashMap<>(); + @SuppressWarnings("serial") + private Type type = new TypeToken<Map<RyaURI, RyaType>>(){}.getType(); + + public StatementMetadata() {} public StatementMetadata(byte[] value) throws RdfDAOException { try { if (value == null) { metadataMap = new HashMap<>(); } else { - // try to convert back to a json string and then back to the map. + // try to convert back to a json string and then back to the + // map. String metadataString = new String(value, "UTF8"); - metadataMap = gson.fromJson(metadataString, HashMap.class); + metadataMap = gson.fromJson(metadataString, type); if (metadataMap == null) { metadataMap = new HashMap<>(); } @@ -60,14 +71,22 @@ public class StatementMetadata { public StatementMetadata(String statementMetadata) { try { - metadataMap = gson.fromJson(statementMetadata, HashMap.class); + metadataMap = gson.fromJson(statementMetadata, type); } catch (Exception e) { throw new RdfDAOException(e); } } - - public String toString(){ - return gson.toJson(metadataMap); + + public void addMetadata(RyaURI key, RyaType value) { + metadataMap.put(key, value); + } + + public Map<RyaURI, RyaType> getMetadata() { + return metadataMap; + } + + public String toString() { + return gson.toJson(metadataMap, type); } public byte[] toBytes() { @@ -75,9 +94,45 @@ public class StatementMetadata { if (metadataMap.isEmpty()) { return null; } - String metadataString = gson.toJson(metadataMap); // TODO may want to cache this for performance reasons - return metadataString.getBytes(); + try { + return toString().getBytes("UTF8"); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); + } + } + + public static class RyaTypeAdapter implements JsonSerializer<RyaType>, JsonDeserializer<RyaType> { + @Override + public JsonElement serialize(RyaType src, Type typeOfSrc, JsonSerializationContext context) { + JsonObject result = new JsonObject(); + result.add("type", new JsonPrimitive(src.getClass().getName())); + + StringBuilder builder = new StringBuilder(); + builder.append(src.getData()).append("\u0000").append(src.getDataType().toString()); + result.add("properties", new JsonPrimitive(builder.toString())); + + return result; + } + + @Override + public RyaType deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) + throws JsonParseException { + JsonObject jsonObject = json.getAsJsonObject(); + String type = jsonObject.get("type").getAsString(); + JsonElement element = jsonObject.get("properties"); + String[] array = element.getAsJsonPrimitive().getAsString().split("\u0000"); + Preconditions.checkArgument(array.length == 2); + + if(type.equals(RyaURI.class.getName())){ + return new RyaURI(array[0]); + } else if(type.equals(RyaType.class.getName())){ + RyaType ryaType = new RyaType(new URIImpl(array[1]), array[0]); + return ryaType; + } else { + throw new IllegalArgumentException("Unparseable RyaType."); + } + } } } http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/11349b11/common/rya.api/src/main/java/org/apache/rya/api/persist/query/RyaQueryEngine.java ---------------------------------------------------------------------- diff --git a/common/rya.api/src/main/java/org/apache/rya/api/persist/query/RyaQueryEngine.java b/common/rya.api/src/main/java/org/apache/rya/api/persist/query/RyaQueryEngine.java index 7e5cbe4..5404804 100644 --- a/common/rya.api/src/main/java/org/apache/rya/api/persist/query/RyaQueryEngine.java +++ b/common/rya.api/src/main/java/org/apache/rya/api/persist/query/RyaQueryEngine.java @@ -23,6 +23,7 @@ package org.apache.rya.api.persist.query; import info.aduna.iteration.CloseableIteration; +import java.io.Closeable; import java.util.Collection; import java.util.Map; @@ -40,7 +41,7 @@ import org.openrdf.query.BindingSet; * Date: 7/17/12 * Time: 8:25 AM */ -public interface RyaQueryEngine<C extends RdfCloudTripleStoreConfiguration> extends RyaConfigured<C> { +public interface RyaQueryEngine<C extends RdfCloudTripleStoreConfiguration> extends RyaConfigured<C>, Closeable { /** * Query the Rya store using the RyaStatement. The Configuration object provides information such as auths, ttl, etc http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/11349b11/common/rya.api/src/test/java/org/apache/rya/api/domain/StatementMetadataTest.java ---------------------------------------------------------------------- diff --git a/common/rya.api/src/test/java/org/apache/rya/api/domain/StatementMetadataTest.java b/common/rya.api/src/test/java/org/apache/rya/api/domain/StatementMetadataTest.java index 2252dc2..d61a802 100644 --- a/common/rya.api/src/test/java/org/apache/rya/api/domain/StatementMetadataTest.java +++ b/common/rya.api/src/test/java/org/apache/rya/api/domain/StatementMetadataTest.java @@ -20,6 +20,7 @@ package org.apache.rya.api.domain; import org.junit.Assert; import org.junit.Test; +import org.openrdf.model.impl.URIImpl; public class StatementMetadataTest { @@ -31,7 +32,7 @@ public class StatementMetadataTest { StatementMetadata single = new StatementMetadata(); - single.addMetadata("This", "That"); + single.addMetadata(new RyaURI("http://uri"), new RyaType("http://type")); byte[] singleData = single.toBytes(); Assert.assertArrayEquals(singleData, new StatementMetadata(singleData).toBytes()); http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/11349b11/dao/accumulo.rya/src/main/java/org/apache/rya/accumulo/query/AccumuloRyaQueryEngine.java ---------------------------------------------------------------------- diff --git a/dao/accumulo.rya/src/main/java/org/apache/rya/accumulo/query/AccumuloRyaQueryEngine.java b/dao/accumulo.rya/src/main/java/org/apache/rya/accumulo/query/AccumuloRyaQueryEngine.java index d7f9dfa..7626f29 100644 --- a/dao/accumulo.rya/src/main/java/org/apache/rya/accumulo/query/AccumuloRyaQueryEngine.java +++ b/dao/accumulo.rya/src/main/java/org/apache/rya/accumulo/query/AccumuloRyaQueryEngine.java @@ -407,4 +407,8 @@ public class AccumuloRyaQueryEngine implements RyaQueryEngine<AccumuloRdfConfigu public AccumuloRdfConfiguration getConf() { return configuration; } + + @Override + public void close() throws IOException { + } } http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/11349b11/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/MongoConnectorFactory.java ---------------------------------------------------------------------- diff --git a/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/MongoConnectorFactory.java b/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/MongoConnectorFactory.java index 3d70219..1d07e70 100644 --- a/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/MongoConnectorFactory.java +++ b/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/MongoConnectorFactory.java @@ -24,18 +24,23 @@ import java.util.Arrays; import org.apache.commons.configuration.ConfigurationRuntimeException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.http.annotation.ThreadSafe; import com.mongodb.MongoClient; import com.mongodb.MongoCredential; import com.mongodb.MongoException; import com.mongodb.ServerAddress; +import edu.umd.cs.findbugs.annotations.DefaultAnnotation; +import edu.umd.cs.findbugs.annotations.NonNull; /** * Mongo convention generally allows for a single instance of a {@link MongoClient} * throughout the life cycle of an application. This MongoConnectorFactory lazy * loads a Mongo Client and uses the same one whenever {@link MongoConnectorFactory#getMongoClient(Configuration)} * is invoked. */ +@ThreadSafe +@DefaultAnnotation(NonNull.class) public class MongoConnectorFactory { private static MongoClient mongoClient; http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/11349b11/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/MongoDBQueryEngine.java ---------------------------------------------------------------------- diff --git a/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/MongoDBQueryEngine.java b/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/MongoDBQueryEngine.java index cf9bf78..a57825a 100644 --- a/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/MongoDBQueryEngine.java +++ b/dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/MongoDBQueryEngine.java @@ -57,7 +57,7 @@ import com.google.common.collect.Multimap; * Date: 7/17/12 * Time: 9:28 AM */ -public class MongoDBQueryEngine implements RyaQueryEngine<MongoDBRdfConfiguration>, Closeable { +public class MongoDBQueryEngine implements RyaQueryEngine<MongoDBRdfConfiguration> { private MongoDBRdfConfiguration configuration; private final MongoClient mongoClient; http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/11349b11/extras/indexing/src/main/java/org/apache/rya/indexing/IndexPlanValidator/ValidIndexCombinationGenerator.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/IndexPlanValidator/ValidIndexCombinationGenerator.java b/extras/indexing/src/main/java/org/apache/rya/indexing/IndexPlanValidator/ValidIndexCombinationGenerator.java index 574190e..3aac2f7 100644 --- a/extras/indexing/src/main/java/org/apache/rya/indexing/IndexPlanValidator/ValidIndexCombinationGenerator.java +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/IndexPlanValidator/ValidIndexCombinationGenerator.java @@ -26,7 +26,6 @@ import java.util.NoSuchElementException; import java.util.Set; import org.apache.rya.indexing.external.tupleSet.ExternalTupleSet; - import org.openrdf.query.algebra.Filter; import org.openrdf.query.algebra.QueryModelNode; import org.openrdf.query.algebra.StatementPattern; @@ -39,16 +38,19 @@ import com.google.common.collect.Sets; public class ValidIndexCombinationGenerator { - private TupleExpr query; private Set<String> invalidCombos = Sets.newTreeSet(); private Set<QueryModelNode> spFilterSet; public ValidIndexCombinationGenerator(TupleExpr query) { - this.query = query; SpFilterCollector sfc = new SpFilterCollector(); query.visit(sfc); spFilterSet = sfc.getSpFilterSet(); } + + public ValidIndexCombinationGenerator(List<QueryModelNode> qNodes) { + spFilterSet = Sets.newHashSet(qNodes); + } + public Iterator<List<ExternalTupleSet>> getValidIndexCombos( List<ExternalTupleSet> indexSet) { @@ -434,10 +436,6 @@ public class ValidIndexCombinationGenerator { private Set<QueryModelNode> spFilterSet = Sets.newHashSet(); - public int getNodeNumber() { - return spFilterSet.size(); - } - public Set<QueryModelNode> getSpFilterSet() { return spFilterSet; } http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/11349b11/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/ConfigUtils.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/ConfigUtils.java b/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/ConfigUtils.java index 61a1003..88e9667 100644 --- a/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/ConfigUtils.java +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/accumulo/ConfigUtils.java @@ -62,6 +62,7 @@ import org.apache.rya.indexing.accumulo.temporal.AccumuloTemporalIndexer; import org.apache.rya.indexing.external.PrecomputedJoinIndexer; import org.apache.rya.indexing.mongodb.freetext.MongoFreeTextIndexer; import org.apache.rya.indexing.pcj.matching.PCJOptimizer; +import org.apache.rya.indexing.statement.metadata.matching.StatementMetadataOptimizer; /** * A set of configuration utils to read a Hadoop {@link Configuration} object and create Cloudbase/Accumulo objects. @@ -405,7 +406,10 @@ public class ConfigUtils { if (getUseEntity(conf)) { indexList.add(EntityCentricIndex.class.getName()); optimizers.add(EntityOptimizer.class.getName()); - + } + + if(conf.getUseStatementMetadata()) { + optimizers.add(StatementMetadataOptimizer.class.getName()); } conf.setStrings(AccumuloRdfConfiguration.CONF_ADDITIONAL_INDEXERS, indexList.toArray(new String[]{})); http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/11349b11/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/AbstractExternalSetMatcher.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/AbstractExternalSetMatcher.java b/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/AbstractExternalSetMatcher.java new file mode 100644 index 0000000..65c1c9d --- /dev/null +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/AbstractExternalSetMatcher.java @@ -0,0 +1,148 @@ +package org.apache.rya.indexing.external.matching; +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.ArrayList; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.rya.indexing.external.matching.QueryNodesToTupleExpr.TupleExprAndNodes; +import org.openrdf.query.algebra.BinaryTupleOperator; +import org.openrdf.query.algebra.Filter; +import org.openrdf.query.algebra.QueryModelNode; +import org.openrdf.query.algebra.TupleExpr; +import org.openrdf.query.algebra.UnaryTupleOperator; +import org.openrdf.query.algebra.evaluation.impl.ExternalSet; + +/** + * This class provides implementations of methods common to all implementations + * of the {@link ExternalSetMatcher} interface. + * + */ +public abstract class AbstractExternalSetMatcher<T extends ExternalSet> implements ExternalSetMatcher<T> { + + protected QuerySegment<T> segment; + protected List<QueryModelNode> segmentNodeList; + protected TupleExpr tuple; + protected Set<TupleExpr> unmatched; + protected Set<Filter> filters; + private QuerySegmentFactory<T> factory = new QuerySegmentFactory<T>(); + + /** + * Matches {@link QuerySegment} with underlying QuerySegment. If match + * occurs, corresponding nodes are replaced by ExternalSet node. After each + * call of this method, call updateTuplesAndNodes to update related + * information. + * + * @param nodes + * - QuerySegment representation of ExternalSet to be used for + * matching + * @param set + * - {@link ExternalSet} used to replace matching ExternalSet + * nodes when match occurs + * @return - true is match and replace occurs and false otherwise + */ + protected abstract boolean match(QuerySegment<T> nodes, T set); + + /** + * In following method, order is determined by the order in which the node + * appear in the query. + * + * @return - an ordered view of the QueryModelNodes appearing tuple + * + */ + @Override + public List<QueryModelNode> getOrderedNodes() { + return Collections.unmodifiableList(segmentNodeList); + } + + @Override + public Set<Filter> getFilters() { + return filters; + } + + @Override + public TupleExpr getQuery() { + return tuple; + } + + @Override + public Set<TupleExpr> getUnmatchedArgNodes() { + return unmatched; + } + + @Override + public QuerySegment<T> nodeToQuerySegment(QueryModelNode node) { + return factory.getQuerySegment(node); + } + + @Override + public List<QueryModelNode> getAllUnmatchedNodes() { + List<QueryModelNode> unmatched = new ArrayList<>(); + for (QueryModelNode node : segmentNodeList) { + if (!(node instanceof ExternalSet)) { + unmatched.add(node); + } + } + return unmatched; + } + + protected void updateTupleAndNodes() { + segmentNodeList = segment.getOrderedNodes(); + final TupleExprAndNodes tupAndNodes = segment.getQuery(); + tuple = tupAndNodes.getTupleExpr(); + filters = tupAndNodes.getFilters(); + unmatched = new HashSet<>(); + final List<QueryModelNode> nodes = tupAndNodes.getNodes(); + for (final QueryModelNode q : nodes) { + if (q instanceof UnaryTupleOperator || q instanceof BinaryTupleOperator) { + unmatched.add((TupleExpr) q); + } else if (q instanceof FlattenedOptional) { + FlattenedOptional opt = (FlattenedOptional) q; + TupleExpr rightArg = opt.getRightArg(); + if (rightArg instanceof UnaryTupleOperator || rightArg instanceof BinaryTupleOperator) { + unmatched.add(rightArg); + } + } + } + } + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/11349b11/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/AbstractExternalSetMatcherFactory.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/AbstractExternalSetMatcherFactory.java b/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/AbstractExternalSetMatcherFactory.java new file mode 100644 index 0000000..d4cd2e5 --- /dev/null +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/AbstractExternalSetMatcherFactory.java @@ -0,0 +1,46 @@ +package org.apache.rya.indexing.external.matching; +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import org.openrdf.query.algebra.Join; +import org.openrdf.query.algebra.LeftJoin; +import org.openrdf.query.algebra.evaluation.impl.ExternalSet; + +/** + * This class takes in a given {@link Join}, {@Filter}, or {@link LeftJoin} + * and provides the appropriate {@link ExternalSetMatcher} to match Entities to the + * given query. + */ +public abstract class AbstractExternalSetMatcherFactory<T extends ExternalSet> { + + public ExternalSetMatcher<T> getMatcher(final QuerySegment<T> segment) { + if(segment instanceof JoinSegment<?>) { + return getJoinSegmentMatcher((JoinSegment<T>) segment); + } else if(segment instanceof OptionalJoinSegment<?>) { + return getOptionalJoinSegmentMatcher((OptionalJoinSegment<T>)segment); + } else { + throw new IllegalArgumentException("Invalid Segment."); + } + } + + protected abstract ExternalSetMatcher<T> getJoinSegmentMatcher(JoinSegment<T> segment); + + protected abstract ExternalSetMatcher<T> getOptionalJoinSegmentMatcher(OptionalJoinSegment<T> segment); + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/11349b11/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/AbstractExternalSetOptimizer.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/AbstractExternalSetOptimizer.java b/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/AbstractExternalSetOptimizer.java new file mode 100644 index 0000000..9ba40fd --- /dev/null +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/AbstractExternalSetOptimizer.java @@ -0,0 +1,145 @@ +package org.apache.rya.indexing.external.matching; +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.rya.indexing.external.matching.QueryNodesToTupleExpr.TupleExprAndNodes; +import org.apache.rya.indexing.pcj.matching.PCJOptimizerUtilities; +import org.openrdf.query.BindingSet; +import org.openrdf.query.Dataset; +import org.openrdf.query.algebra.BinaryTupleOperator; +import org.openrdf.query.algebra.Filter; +import org.openrdf.query.algebra.Join; +import org.openrdf.query.algebra.LeftJoin; +import org.openrdf.query.algebra.QueryModelNode; +import org.openrdf.query.algebra.TupleExpr; +import org.openrdf.query.algebra.UnaryTupleOperator; +import org.openrdf.query.algebra.evaluation.QueryOptimizer; +import org.openrdf.query.algebra.evaluation.impl.ExternalSet; +import org.openrdf.query.algebra.helpers.QueryModelVisitorBase; + +import com.google.common.base.Optional; + +/** + * Abstract base class meant to be extended by any QueryOptimizer that matches ExternalSets + * to QueryModelNodes within the parsed query plan. + * + * @param <T> - ExternalSet parameter + */ +public abstract class AbstractExternalSetOptimizer<T extends ExternalSet> implements QueryOptimizer { + + protected boolean useOptimal = false; + + @Override + public void optimize(TupleExpr tupleExpr, Dataset dataset, BindingSet bindings) { + QuerySegmentMatchVisitor visitor = new QuerySegmentMatchVisitor(); + tupleExpr.visit(visitor); + } + + /** + * This visitor navigates query until it reaches either a Join, Filter, or + * LeftJoin. Once it reaches this node, it creates the appropriate + * ExternalSetMatcher and uses this to match each of the {@link ExternalSet} + * s to the {@link QuerySegment} starting with the Join, Filter, or + * LeftJoin. Once each ExternalSet has been compared for matching, the + * portion of the query starting with the Join, Filter, or LeftJoin is + * replaced by the {@link TupleExpr} returned by + * {@link ExternalSetMatcher#getQuery()}. This visitor then visits each of + * the nodes returned by {@link ExternalSetMatcher#getUnmatchedArgs()}. + * + */ + protected class QuerySegmentMatchVisitor extends QueryModelVisitorBase<RuntimeException> { + + private final QuerySegmentFactory<T> factory = new QuerySegmentFactory<T>(); + + @Override + public void meetNode(QueryModelNode node) { + + if (checkNode(node)) { + QuerySegment<T> segment = factory.getQuerySegment(node); + ExternalSetProvider<T> provider = getProvider(); + ExternalSetMatcher<T> matcher = getMatcher(segment); + QuerySegment<T> tempSeg = null; + if(useOptimal) { + tempSeg = matcher.match(provider.getExternalSetCombos(segment), getNodeListRater(segment)); + } else { + tempSeg = matcher.match(provider.getExternalSets(segment)); + } + + TupleExprAndNodes tups = tempSeg.getQuery(); + node.replaceWith(tups.getTupleExpr()); + Set<TupleExpr> unmatched = getUnMatchedArgNodes(tups.getNodes()); + PCJOptimizerUtilities.relocateFilters(tups.getFilters()); + + for (final TupleExpr tupleExpr : unmatched) { + tupleExpr.visit(this); + } + } else { + super.meetNode(node); + } + } + } + + private Set<TupleExpr> getUnMatchedArgNodes(List<QueryModelNode> nodes) { + Set<TupleExpr> unmatched = new HashSet<>(); + for (final QueryModelNode q : nodes) { + if (q instanceof UnaryTupleOperator || q instanceof BinaryTupleOperator) { + unmatched.add((TupleExpr) q); + } else if (q instanceof FlattenedOptional) { + FlattenedOptional opt = (FlattenedOptional) q; + TupleExpr rightArg = opt.getRightArg(); + if (rightArg instanceof UnaryTupleOperator || rightArg instanceof BinaryTupleOperator) { + unmatched.add(rightArg); + } + } + } + return unmatched; + } + + + private static boolean checkNode(QueryModelNode node) { + return (node instanceof Join || node instanceof Filter || node instanceof LeftJoin); + } + + /** + * Get Matcher used to match ExternalSets to query + * + * @param segment + * @return + */ + protected abstract ExternalSetMatcher<T> getMatcher(QuerySegment<T> segment); + + /** + * Get ExternalSetProvider for source of ExternalSets to match to query + * + * @return + */ + protected abstract ExternalSetProvider<T> getProvider(); + + /** + * Get QueryNodeListRater to find optimal QueryNodeList after matching ExternalSets + * + * @return + */ + protected abstract Optional<QueryNodeListRater> getNodeListRater(QuerySegment<T> segment); + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/11349b11/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/AbstractQuerySegment.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/AbstractQuerySegment.java b/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/AbstractQuerySegment.java new file mode 100644 index 0000000..49f39f4 --- /dev/null +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/AbstractQuerySegment.java @@ -0,0 +1,122 @@ +package org.apache.rya.indexing.external.matching; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.rya.indexing.external.matching.QueryNodesToTupleExpr.TupleExprAndNodes; +import org.openrdf.query.algebra.Filter; +import org.openrdf.query.algebra.QueryModelNode; +import org.openrdf.query.algebra.ValueExpr; +import org.openrdf.query.algebra.evaluation.impl.ExternalSet; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; + +/** + * This class provides implementations of methods common to implementations of + * the {@link QuerySegment} interface. + * + */ +public abstract class AbstractQuerySegment<T extends ExternalSet> implements QuerySegment<T>, Cloneable { + + protected List<QueryModelNode> orderedNodes = new ArrayList<>(); + protected Set<QueryModelNode> unorderedNodes = new HashSet<>(); + protected Map<ValueExpr, Filter> conditionMap = Maps.newHashMap(); + + /** + * Returns Set of nodes contained in the segment + */ + @Override + public Set<QueryModelNode> getUnOrderedNodes() { + return Collections.unmodifiableSet(unorderedNodes); + } + + /** + * Returns a List of nodes contained in this segment, where order is + * determined by the getJoinArgs method + * + * @param TupleExpr + * from top to bottom. + */ + @Override + public List<QueryModelNode> getOrderedNodes() { + return Collections.unmodifiableList(orderedNodes); + } + + /** + * Allows nodes to be reordered using {@link ExternalSetMatcher} and set + * + * @param nodes + * - reordering of orderedNodes + */ + @Override + public void setNodes(List<QueryModelNode> nodes) { + Set<QueryModelNode> nodeSet = Sets.newHashSet(nodes); + Preconditions.checkArgument(nodeSet.equals(unorderedNodes)); + orderedNodes = nodes; + unorderedNodes = nodeSet; + } + + /** + * @param query + * - QuerySegment that this method checks for in this JoinSegment + */ + @Override + public boolean containsQuerySegment(QuerySegment<T> query) { + return unorderedNodes.containsAll(query.getUnOrderedNodes()); + } + + /** + * @return - a TupleExpr representing this JoinSegment + */ + @Override + public TupleExprAndNodes getQuery() { + List<QueryModelNode> nodeCopy = new ArrayList<>(); + for (QueryModelNode q : orderedNodes) { + if (!(q instanceof ValueExpr)) { + nodeCopy.add(q.clone()); + } + } + QueryNodesToTupleExpr qnt = new QueryNodesToTupleExpr(nodeCopy, getFilters()); + return qnt.getTupleAndNodes(); + } + + @Override + public Set<Filter> getFilters() { + Collection<Filter> filters = conditionMap.values(); + Set<Filter> filterSet = new HashSet<>(); + for (Filter filter : filters) { + filterSet.add(filter.clone()); + } + + return filterSet; + + } + + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/11349b11/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/BasicRater.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/BasicRater.java b/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/BasicRater.java new file mode 100644 index 0000000..a26c872 --- /dev/null +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/BasicRater.java @@ -0,0 +1,127 @@ +package org.apache.rya.indexing.external.matching; +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.openrdf.query.algebra.QueryModelNode; +import org.openrdf.query.algebra.TupleExpr; + +import com.google.common.collect.HashMultimap; +import com.google.common.collect.Multimap; +import com.google.common.collect.Sets; + +/** + * This implementation of the QueryNodeListRater assigns a score to a specified + * list between and 0 and 1, where the lower the score, the better the list. It + * is assumed that the specified list in + * {@link BasicRater#rateQuerySegment(List)} is the result of matching + * {@link ExternalSet}s to the original list specified in the constructor. The + * method {@link BasicRater#rateQuerySegment(List)} determines a score based on + * how much smaller the specified list is than the original, and based on how + * many connected components the specified list has. Here the components are among + * the graph obtained by drawing edges between QueryModelNodes with common + * variables. + * + */ +public class BasicRater implements QueryNodeListRater { + + private List<QueryModelNode> qNodes; + + public BasicRater(List<QueryModelNode> qNodes) { + this.qNodes = qNodes; + } + + @Override + public double rateQuerySegment(List<QueryModelNode> eNodes) { + return .6 * ((double) eNodes.size()) / qNodes.size() + .4 * getConnectedComponentRating(eNodes); + } + + private double getConnectedComponentRating(List<QueryModelNode> eNodes) { + + Multimap<String, Integer> commonVarBin = HashMultimap.create(); + + // bin QueryModelNode positions according to variable names + for (int i = 0; i < eNodes.size(); i++) { + QueryModelNode node = eNodes.get(i); + if (node instanceof TupleExpr) { + TupleExpr tup = (TupleExpr) node; + Set<String> bindingNames = tup.getAssuredBindingNames(); + for (String name : bindingNames) { + if (!name.startsWith("-const-")) { + commonVarBin.put(name, i); + } + } + } + } + + Set<List<Integer>> pairs = new HashSet<>(); + for (String var : commonVarBin.keySet()) { + Set<Integer> pos = Sets.newHashSet(commonVarBin.get(var)); + pairs.addAll(Sets.cartesianProduct(pos, pos)); + } + + int numComponents = countComponents(eNodes.size(), pairs); + return ((double) numComponents) / eNodes.size(); + + } + + public int countComponents(int n, Set<List<Integer>> pairs) { + int count = n; + + int[] root = new int[n]; + // initialize each node is an island + for (int i = 0; i < n; i++) { + root[i] = i; + } + + for (List<Integer> pair : pairs) { + + int x = pair.get(0); + int y = pair.get(1); + + // ignore self directed edges + if (x == y) { + continue; + } + + int xRoot = getRoot(root, x); + int yRoot = getRoot(root, y); + + if (xRoot != yRoot) { + count--; + root[xRoot] = yRoot; + } + + } + + return count; + } + + public int getRoot(int[] arr, int i) { + while (arr[i] != i) { + arr[i] = arr[arr[i]]; + i = arr[i]; + } + return i; + } + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/11349b11/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/ExternalSetConverter.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/ExternalSetConverter.java b/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/ExternalSetConverter.java new file mode 100644 index 0000000..2e973e9 --- /dev/null +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/ExternalSetConverter.java @@ -0,0 +1,33 @@ +package org.apache.rya.indexing.external.matching; +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import org.openrdf.query.algebra.evaluation.impl.ExternalSet; + +public interface ExternalSetConverter<T extends ExternalSet> { + + /** + * Converts the {@link ExternalSet} to a {@link QuerySegment} + * + * @param set - ExternalSet to be converted + * @return QuerySegment derived from ExternalSet + */ + public QuerySegment<T> setToSegment(T set); + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/11349b11/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/ExternalSetMatcher.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/ExternalSetMatcher.java b/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/ExternalSetMatcher.java new file mode 100644 index 0000000..5456bed --- /dev/null +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/ExternalSetMatcher.java @@ -0,0 +1,133 @@ +package org.apache.rya.indexing.external.matching; +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.Collection; +import java.util.Iterator; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.List; +import java.util.Set; + +import org.openrdf.query.algebra.Filter; +import org.openrdf.query.algebra.Join; +import org.openrdf.query.algebra.LeftJoin; +import org.openrdf.query.algebra.QueryModelNode; +import org.openrdf.query.algebra.TupleExpr; +import org.openrdf.query.algebra.evaluation.impl.ExternalSet; + +import com.google.common.base.Optional; + +/** + * This interface provides a framework for matching {@link ExternalSet}s to + * subsets of a given {@link QuerySegment}. + * + */ +public interface ExternalSetMatcher<T extends ExternalSet> { + + /** + * + *Matches specified ExternalSet to underlying QuerySegment and returns + *true if match occurs and false otherwise. Underlying QuerySegment is changed by + *matching ExternalSet to matching query nodes. + * + * @param set + * - {@link ExternalSet} used to replace matching PCJ nodes when + * match occurs + * @return - true is match and replace occurs and false otherwise + */ + public boolean match(T set); + + /** + *Matches specified ExternalSets to underlying QuerySegment and returns + *the resulting matched QuerySegment. Underlying QuerySegment is unchanged. + * + * @param sets - Collection of ExternalSets to be matched to QuerySegment + */ + public QuerySegment<T> match(Collection<T> sets); + + /** + * Allows Matcher to iterate over different Collections of ExternalSets to find an optimal combination of matching ExternalSets. + * Returns the QuerySegment whose node list has a minimum Rating according to either specified or default QueryNodeListRater. + * + * @param sets - Iterator over Lists of ExternalSets to be matched to QuerySegment. + * @param rater - Class that rates the resulting QuerySegment after each collection is matched for the purpose + * of finding an optimal ExternalSet combination. If the rater is not provided, the {@link BasicRater} is used. + */ + public QuerySegment<T> match(Iterator<List<T>> sets, Optional<QueryNodeListRater> rater); + + /** + * @return - TupleExpr constructed from {@link QuerySegment} with matched + * nodes + */ + public TupleExpr getQuery(); + + /** + * Converts TupleExpr starting with given node to a {@link QuerySegment}. + * Node must be a {@link Filter}, {@link Join}, or {@link LeftJoin}. + * + * @param node + * @return + */ + public QuerySegment<T> nodeToQuerySegment(QueryModelNode node); + + /** + * @return - all {@link TupleExpr} that haven't been matched to a PCJ + */ + public Set<TupleExpr> getUnmatchedArgNodes(); + + /** + * Similar to {@link #getUnmatchedArgs()}, except does not ignore no arg + * statements. + * + * @return a list of all unmatched nodes. + */ + public List<QueryModelNode> getAllUnmatchedNodes(); + + /** + * + * @return - provided ordered view of QuerySegment nodes + */ + public List<QueryModelNode> getOrderedNodes(); + + /** + * + * @return - Set of {@link Filter}s of given QuerySegment + */ + public Set<Filter> getFilters(); + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/11349b11/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/ExternalSetProvider.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/ExternalSetProvider.java b/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/ExternalSetProvider.java new file mode 100644 index 0000000..801bb87 --- /dev/null +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/ExternalSetProvider.java @@ -0,0 +1,50 @@ +package org.apache.rya.indexing.external.matching; +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.Iterator; +import java.util.List; + +import org.openrdf.query.algebra.evaluation.impl.ExternalSet; + +/** + * Interface for extracting {@link ExternalSet}s from specified {@link QuerySegment}s. + * + * @param <T> - Extension of ExternalSet class + */ +public interface ExternalSetProvider<T extends ExternalSet> { + + /** + * Extract all {@link ExternalSet}s from specified QuerySegment. + * + * @param segment + * @return - List of ExternalSets + */ + public List<T> getExternalSets(QuerySegment<T> segment); + + /** + * Extract an Iterator over Lists of ExternalSets. This allows an ExtenalSetProvider to pass back + * different combinations of ExternalSets for the purposes of query optimization. + * + * @param segment + * @return - Iterator over different combinations of ExternalSets + */ + public Iterator<List<T>> getExternalSetCombos(QuerySegment<T> segment); + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/11349b11/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/FlattenedOptional.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/FlattenedOptional.java b/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/FlattenedOptional.java new file mode 100644 index 0000000..a0cbdb8 --- /dev/null +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/FlattenedOptional.java @@ -0,0 +1,321 @@ +package org.apache.rya.indexing.external.matching; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import org.apache.rya.rdftriplestore.inference.DoNotExpandSP; +import org.apache.rya.rdftriplestore.utils.FixedStatementPattern; + +import org.openrdf.query.algebra.Filter; +import org.openrdf.query.algebra.Join; +import org.openrdf.query.algebra.LeftJoin; +import org.openrdf.query.algebra.QueryModelNodeBase; +import org.openrdf.query.algebra.QueryModelVisitor; +import org.openrdf.query.algebra.TupleExpr; +import org.openrdf.query.algebra.ValueExpr; +import org.openrdf.query.algebra.Var; + +import com.google.common.collect.Sets; + +/** + * This class is essentially a wrapper for {@link LeftJoin}. It provides a + * flattened view of a LeftJoin that is useful for matching {@AccumuloIndexSet } + * nodes to sub-queries to use for Precomputed Joins. Because LeftJoins cannot + * automatically be interchanged with {@link Join}s and other LeftJoins in the + * query plan, this class has utility methods to check when nodes can be + * interchanged in the query plan. These methods track which variables returned + * by {@link LeftJoin#getRightArg()} are bound. A variable is bound if it also + * contained in the set returned by {@link LeftJoin#getLeftArg()}. Nodes can be + * interchanged with a LeftJoin (and hence a FlattenedOptional) so long as the + * bound and unbound variables do not change. + * + */ +public class FlattenedOptional extends QueryModelNodeBase implements TupleExpr { + + private Set<TupleExpr> rightArgs; + private Set<String> boundVars; + private Set<String> unboundVars; + private Map<String, Integer> leftArgVarCounts = new HashMap<String, Integer>(); + private ValueExpr condition; + private TupleExpr rightArg; + private Set<String> bindingNames; + private Set<String> assuredBindingNames; + + public FlattenedOptional(LeftJoin node) { + rightArgs = getJoinArgs(node.getRightArg(), new HashSet<TupleExpr>()); + boundVars = setWithOutConstants( + Sets.intersection(node.getLeftArg().getAssuredBindingNames(), node.getRightArg().getBindingNames())); + unboundVars = setWithOutConstants(Sets.difference(node.getRightArg().getBindingNames(), boundVars)); + condition = node.getCondition(); + rightArg = node.getRightArg(); + getVarCounts(node); + assuredBindingNames = new HashSet<>(leftArgVarCounts.keySet()); + bindingNames = new HashSet<>(Sets.union(assuredBindingNames, unboundVars)); + } + + public FlattenedOptional(FlattenedOptional optional) { + this.rightArgs = optional.rightArgs; + this.boundVars = optional.boundVars; + this.unboundVars = optional.unboundVars; + this.condition = optional.condition; + this.rightArg = optional.rightArg; + this.leftArgVarCounts = optional.leftArgVarCounts; + this.bindingNames = optional.bindingNames; + this.assuredBindingNames = optional.assuredBindingNames; + } + + public Set<TupleExpr> getRightArgs() { + return rightArgs; + } + + public TupleExpr getRightArg() { + return rightArg; + } + + /** + * + * @param te + * - TupleExpr to be added to leftarg of {@link LeftJoin} + */ + public void addArg(TupleExpr te) { + if (te instanceof FlattenedOptional) { + return; + } + incrementVarCounts(te.getBindingNames()); + } + + public void removeArg(TupleExpr te) { + if (te instanceof FlattenedOptional) { + return; + } + decrementVarCounts(te.getBindingNames()); + } + + /** + * + * @param te + * - {@link TupleExpr} to be added to leftArg of LeftJoin + * @return - true if adding TupleExpr does not affect unbound variables and + * returns false otherwise + */ + public boolean canAddTuple(TupleExpr te) { + // can only add LeftJoin if rightArg varNames do not intersect + // unbound vars + if (te instanceof FlattenedOptional) { + FlattenedOptional lj = (FlattenedOptional) te; + if (Sets.intersection(lj.rightArg.getBindingNames(), unboundVars).size() > 0) { + return false; + } else { + return true; + } + } + + return Sets.intersection(te.getBindingNames(), unboundVars).size() == 0; + } + + /** + * + * @param te + * - {@link TupleExpr} to be removed from leftArg of LeftJoin + * @return - true if removing TupleExpr does not affect bound variables and + * returns false otherwise + */ + public boolean canRemoveTuple(TupleExpr te) { + return canRemove(te); + } + + @Override + public Set<String> getBindingNames() { + return bindingNames; + } + + @Override + public Set<String> getAssuredBindingNames() { + return assuredBindingNames; + } + + public ValueExpr getCondition() { + return condition; + } + + @Override + public boolean equals(Object other) { + if (other instanceof FlattenedOptional) { + FlattenedOptional ljDec = (FlattenedOptional) other; + ValueExpr oCond = ljDec.getCondition(); + return nullEquals(condition, oCond) && ljDec.getRightArgs().equals(rightArgs); + } + return false; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = prime + (rightArgs == null ? 0 : rightArgs.hashCode()); + result = prime * result + (condition == null ? 0 : condition.hashCode()); + return result; + } + + /** + * This method is used to retrieve a set view of all descendants of the + * rightArg of the LeftJoin (the optional part) + * + * @param tupleExpr + * - tupleExpr whose args are being retrieved + * @param joinArgs + * - set view of all non-join args that are descendants of + * tupleExpr + * @return joinArgs + */ + private Set<TupleExpr> getJoinArgs(TupleExpr tupleExpr, Set<TupleExpr> joinArgs) { + if (tupleExpr instanceof Join) { + if (!(((Join) tupleExpr).getLeftArg() instanceof FixedStatementPattern) + && !(((Join) tupleExpr).getRightArg() instanceof DoNotExpandSP)) { + Join join = (Join) tupleExpr; + getJoinArgs(join.getLeftArg(), joinArgs); + getJoinArgs(join.getRightArg(), joinArgs); + } + } else if (tupleExpr instanceof LeftJoin) { // TODO probably not + // necessary if not + // including leftarg + LeftJoin lj = (LeftJoin) tupleExpr; + joinArgs.add(new FlattenedOptional(lj)); + getJoinArgs(lj.getLeftArg(), joinArgs); + } else if (tupleExpr instanceof Filter) { + getJoinArgs(((Filter) tupleExpr).getArg(), joinArgs); + } else { + joinArgs.add(tupleExpr); + } + + return joinArgs; + } + + /** + * This method counts the number of times each variable appears in the + * leftArg of the LeftJoin defining this FlattenedOptional. This information + * is used to whether nodes can be moved out of the leftarg above the + * LeftJoin in the query. + * + * @param tupleExpr + */ + private void getVarCounts(TupleExpr tupleExpr) { + if (tupleExpr instanceof Join) { + Join join = (Join) tupleExpr; + getVarCounts(join.getLeftArg()); + getVarCounts(join.getRightArg()); + } else if (tupleExpr instanceof LeftJoin) { + LeftJoin lj = (LeftJoin) tupleExpr; + getVarCounts(lj.getLeftArg()); + } else if (tupleExpr instanceof Filter) { + getVarCounts(((Filter) tupleExpr).getArg()); + } else { + incrementVarCounts(tupleExpr.getBindingNames()); + } + } + + /** + * + * @param te + * - {@link TupleExpr} to be removed from leftArg of LeftJoin + * @return - true if removing te doesn't affect bounded variables of + * LeftJoin and false otherwise + */ + private boolean canRemove(TupleExpr te) { + // can only remove LeftJoin if right varNames do not intersect + // unbound vars + if (te instanceof FlattenedOptional) { + FlattenedOptional lj = (FlattenedOptional) te; + if (Sets.intersection(lj.getRightArg().getBindingNames(), unboundVars).size() > 0) { + return false; + } else { + return true; + } + } + Set<String> vars = te.getBindingNames(); + Set<String> intersection = Sets.intersection(vars, boundVars); + if (intersection.size() == 0) { + return true; + } + for (String s : intersection) { + if (leftArgVarCounts.containsKey(s) && leftArgVarCounts.get(s) == 1) { + return false; + } + } + return true; + } + + private void incrementVarCounts(Set<String> vars) { + for (String s : vars) { + if (!s.startsWith("-const-") && leftArgVarCounts.containsKey(s)) { + leftArgVarCounts.put(s, leftArgVarCounts.get(s) + 1); + } else if (!s.startsWith("-const-")) { + leftArgVarCounts.put(s, 1); + } + } + } + + private void decrementVarCounts(Set<String> vars) { + for (String s : vars) { + if (leftArgVarCounts.containsKey(s) && leftArgVarCounts.get(s) > 1) { + leftArgVarCounts.put(s, leftArgVarCounts.get(s) - 1); + } else { + leftArgVarCounts.remove(s); + bindingNames.remove(s); + assuredBindingNames.remove(s); + } + } + } + + /** + * + * @param vars + * - set of {@link Var} names, possibly contained constants + */ + private Set<String> setWithOutConstants(Set<String> vars) { + Set<String> copy = new HashSet<>(); + for (String s : vars) { + if (!s.startsWith("-const-")) { + copy.add(s); + } + } + + return copy; + } + + @Override + public <X extends Exception> void visit(QueryModelVisitor<X> visitor) throws X { + throw new UnsupportedOperationException(); + } + + @Override + public String toString() { + return "FlattenedOptional: " + rightArgs; + } + + @Override + public FlattenedOptional clone() { + return new FlattenedOptional(this); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/11349b11/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/JoinSegment.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/JoinSegment.java b/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/JoinSegment.java new file mode 100644 index 0000000..d69d8d9 --- /dev/null +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/JoinSegment.java @@ -0,0 +1,176 @@ +package org.apache.rya.indexing.external.matching; +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.ArrayList; +import java.util.HashMap; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.rya.indexing.pcj.matching.PCJOptimizerUtilities; +import org.apache.rya.rdftriplestore.inference.DoNotExpandSP; +import org.apache.rya.rdftriplestore.utils.FixedStatementPattern; +import org.openrdf.query.algebra.Filter; +import org.openrdf.query.algebra.Join; +import org.openrdf.query.algebra.QueryModelNode; +import org.openrdf.query.algebra.TupleExpr; +import org.openrdf.query.algebra.ValueExpr; +import org.openrdf.query.algebra.evaluation.impl.ExternalSet; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Sets; + +/** + * This class represents a portion of a {@link TupleExpr} query that PCJ queries + * are compared to. A JoinSegment is comprised of a collection of + * {@link QueryModelNode}s that are connected by {@link Join}s. In the case, the + * QueryModelNodes can commute within the JoinSegment, which makes JoinSegments + * a natural way to partition a query for PCJ matching. A query is decomposed + * into JoinSegments and PCJ queries can easily be compared to the + * {@link QueryModelNode}s contained in the segment using set operations. + * + */ +public class JoinSegment<T extends ExternalSet> extends AbstractQuerySegment<T> { + + public JoinSegment(Join join) { + Preconditions.checkNotNull(join); + Preconditions.checkArgument(!MatcherUtilities.segmentContainsLeftJoins(join)); + createJoinSegment(join); + } + + public JoinSegment(Filter filter) { + Preconditions.checkNotNull(filter); + Preconditions.checkArgument(!MatcherUtilities.segmentContainsLeftJoins(filter)); + createJoinSegment(filter); + } + + public JoinSegment(Set<QueryModelNode> unorderedNodes, List<QueryModelNode> orderedNodes, Map<ValueExpr, Filter> filterMap) { + this.unorderedNodes = unorderedNodes; + this.orderedNodes = orderedNodes; + this.conditionMap = filterMap; + } + + private void createJoinSegment(TupleExpr te) { + orderedNodes = getJoinArgs(te, orderedNodes); + unorderedNodes = Sets.newHashSet(orderedNodes); + } + + /** + * This method matches the ordered nodes returned by + * {@link JoinSegment#getOrderedNodes()} for nodeToReplace with a subset of + * the ordered nodes for this JoinSegment. The order of the nodes for + * nodeToReplace must match the order of the nodes as a subset of + * orderedNodes + * + * @param nodeToReplace + * - nodes to be replaced by pcj + * @param pcj + * - pcj node that will replace specified query nodes + */ + @Override + public boolean replaceWithExternalSet(QuerySegment<T> nodeToReplace, T set) { + Preconditions.checkNotNull(nodeToReplace != null); + Preconditions.checkNotNull(set); + if (!containsQuerySegment(nodeToReplace)) { + return false; + } + Set<QueryModelNode> nodeSet = nodeToReplace.getUnOrderedNodes(); + orderedNodes.removeAll(nodeSet); + orderedNodes.add(set); + unorderedNodes.removeAll(nodeSet); + unorderedNodes.add(set); + for (QueryModelNode q : nodeSet) { + if (q instanceof ValueExpr) { + conditionMap.remove(q); + } + } + return true; + } + + /** + * + * @param tupleExpr + * - the query object that will be traversed by this method + * @param joinArgs + * - all nodes connected by Joins and Filters + * @return - List containing all nodes connected by Joins, LeftJoins, and + * Filters. This List contains the + * @param ValueExpr + * in place of the Filter + */ + private List<QueryModelNode> getJoinArgs(TupleExpr tupleExpr, List<QueryModelNode> joinArgs) { + + if (tupleExpr instanceof Join) { + if (!(((Join) tupleExpr).getLeftArg() instanceof FixedStatementPattern) + && !(((Join) tupleExpr).getRightArg() instanceof DoNotExpandSP)) { + Join join = (Join) tupleExpr; + getJoinArgs(join.getRightArg(), joinArgs); + getJoinArgs(join.getLeftArg(), joinArgs); + } + } else if (tupleExpr instanceof Filter) { + Filter filter = (Filter) tupleExpr; + joinArgs.add(filter.getCondition()); + conditionMap.put(filter.getCondition(), filter); + getJoinArgs(filter.getArg(), joinArgs); + } else { + joinArgs.add(tupleExpr); + } + return joinArgs; + } + + + @Override + public JoinSegment<T> clone() { + List<QueryModelNode> order = new ArrayList<>(); + for(QueryModelNode node: orderedNodes) { + order.add(node.clone()); + } + + Set<QueryModelNode> unorder = Sets.newHashSet(order); + + Map<ValueExpr, Filter> map = new HashMap<>(); + for(ValueExpr expr: conditionMap.keySet()) { + map.put(expr.clone(), conditionMap.get(expr).clone()); + } + return new JoinSegment<T>(unorder, order, map); + } + + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/11349b11/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/JoinSegmentMatcher.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/JoinSegmentMatcher.java b/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/JoinSegmentMatcher.java new file mode 100644 index 0000000..8f4e43a --- /dev/null +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/JoinSegmentMatcher.java @@ -0,0 +1,121 @@ +package org.apache.rya.indexing.external.matching; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; + +import org.openrdf.query.algebra.QueryModelNode; +import org.openrdf.query.algebra.evaluation.impl.ExternalSet; + +import com.google.common.base.Optional; +import com.google.common.base.Preconditions; + +/** + * This class is responsible for matching PCJ nodes with subsets of the + * {@link QueryModelNode}s found in {@link JoinSegment}s. Each PCJ is reduced to + * a bag of QueryModelNodes and set operations can be used to determine if the + * PCJ is a subset of the JoinSegment. If it is a subset, the PCJ node replaces + * the QueryModelNodes in the JoinSegment. + * + */ + +public class JoinSegmentMatcher<T extends ExternalSet> extends AbstractExternalSetMatcher<T> { + + private ExternalSetConverter<T> converter; + + public JoinSegmentMatcher(QuerySegment<T> segment, ExternalSetConverter<T> converter) { + Preconditions.checkNotNull(segment); + Preconditions.checkNotNull(converter); + Preconditions.checkArgument(segment instanceof JoinSegment); + this.segment = segment; + this.segmentNodeList = new ArrayList<>(segment.getOrderedNodes()); + this.converter = converter; + } + + /** + * Match specified {@link QuerySegment} to this QuerySegment and insert + * {@link ExternalSet} node T in place of matched nodes. Here, the specified + * QuerySegment is derived from the ExternalSet node T. + * + * @param segment + * - {@link QuerySegment} representation of node T + * @param T + * - node to be matched + */ + @Override + protected boolean match(QuerySegment<T> nodes, T set) { + Preconditions.checkArgument(nodes instanceof JoinSegment<?>); + final boolean nodesReplaced = segment.replaceWithExternalSet(nodes, set); + if (nodesReplaced) { + updateTupleAndNodes(); + } + + return nodesReplaced; + } + + @Override + public boolean match(T set) { + Preconditions.checkNotNull(set); + QuerySegment<T> segment = converter.setToSegment(set); + return match(segment, set); + } + + @Override + public QuerySegment<T> match(Iterator<List<T>> sets, Optional<QueryNodeListRater> r) { + QueryNodeListRater rater = null; + + if (r.isPresent()) { + rater = r.get(); + } else { + rater = new BasicRater(segmentNodeList); + } + + double min= Double.MAX_VALUE; + double temp = 0; + QuerySegment<T> minSegment = null; + while (sets.hasNext()) { + QuerySegment<T> tempSegment = match(sets.next()); + temp = rater.rateQuerySegment(tempSegment.getOrderedNodes()); + if(temp < min) { + min = temp; + minSegment = tempSegment; + } + } + + return minSegment; + } + + @Override + public QuerySegment<T> match(Collection<T> sets) { + Preconditions.checkNotNull(sets); + QuerySegment<T> copy = ((JoinSegment<T>) segment).clone(); + for (T set : sets) { + QuerySegment<T> nodes = converter.setToSegment(set); + copy.replaceWithExternalSet(nodes, set); + } + + return copy; + } + + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/11349b11/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/MatcherUtilities.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/MatcherUtilities.java b/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/MatcherUtilities.java new file mode 100644 index 0000000..b4a48eb --- /dev/null +++ b/extras/indexing/src/main/java/org/apache/rya/indexing/external/matching/MatcherUtilities.java @@ -0,0 +1,45 @@ +package org.apache.rya.indexing.external.matching; +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import org.openrdf.query.algebra.Filter; +import org.openrdf.query.algebra.Join; +import org.openrdf.query.algebra.LeftJoin; +import org.openrdf.query.algebra.Projection; +import org.openrdf.query.algebra.TupleExpr; + +public class MatcherUtilities { + + public static boolean segmentContainsLeftJoins(final TupleExpr tupleExpr) { + if (tupleExpr instanceof Projection) { + return segmentContainsLeftJoins(((Projection) tupleExpr).getArg()); + } else if (tupleExpr instanceof Join) { + final Join join = (Join) tupleExpr; + return segmentContainsLeftJoins(join.getRightArg()) + || segmentContainsLeftJoins(join.getLeftArg()); + } else if (tupleExpr instanceof LeftJoin) { + return true; + } else if (tupleExpr instanceof Filter) { + return segmentContainsLeftJoins(((Filter) tupleExpr).getArg()); + } else { + return false; + } + } + +}
