http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/96dd55ec/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/AccumuloIndexSet.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/AccumuloIndexSet.java b/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/AccumuloIndexSet.java index 5396926..2ca8f4a 100644 --- a/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/AccumuloIndexSet.java +++ b/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/AccumuloIndexSet.java @@ -18,6 +18,8 @@ */ package mvm.rya.indexing.external.tupleSet; +import info.aduna.iteration.CloseableIteration; + import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -28,51 +30,62 @@ import java.util.Map; import java.util.NoSuchElementException; import java.util.Set; +import mvm.rya.accumulo.pcj.iterators.BindingSetHashJoinIterator; +import mvm.rya.accumulo.pcj.iterators.BindingSetHashJoinIterator.HashJoinType; +import mvm.rya.accumulo.pcj.iterators.IteratorCombiner; +import mvm.rya.accumulo.pcj.iterators.PCJKeyToCrossProductBindingSetIterator; +import mvm.rya.accumulo.pcj.iterators.PCJKeyToJoinBindingSetIterator; +import mvm.rya.api.utils.IteratorWrapper; +import mvm.rya.indexing.pcj.matching.PCJOptimizerUtilities; +import mvm.rya.rdftriplestore.evaluation.ExternalBatchingIterator; + import org.apache.accumulo.core.client.AccumuloException; import org.apache.accumulo.core.client.AccumuloSecurityException; +import org.apache.accumulo.core.client.BatchScanner; import org.apache.accumulo.core.client.Connector; import org.apache.accumulo.core.client.MutationsRejectedException; +import org.apache.accumulo.core.client.Scanner; import org.apache.accumulo.core.client.TableNotFoundException; +import org.apache.accumulo.core.data.Range; +import org.apache.accumulo.core.security.Authorizations; import org.apache.hadoop.io.Text; import org.apache.rya.indexing.pcj.storage.PcjException; import org.apache.rya.indexing.pcj.storage.PcjMetadata; +import org.apache.rya.indexing.pcj.storage.accumulo.AccumuloPcjSerializer; +import org.apache.rya.indexing.pcj.storage.accumulo.BindingSetConverter.BindingSetConversionException; import org.apache.rya.indexing.pcj.storage.accumulo.PcjTables; import org.apache.rya.indexing.pcj.storage.accumulo.VariableOrder; +import org.openrdf.model.Value; import org.openrdf.query.Binding; import org.openrdf.query.BindingSet; import org.openrdf.query.MalformedQueryException; import org.openrdf.query.QueryEvaluationException; import org.openrdf.query.algebra.Projection; import org.openrdf.query.algebra.TupleExpr; -import org.openrdf.query.algebra.Var; -import org.openrdf.query.algebra.helpers.QueryModelVisitorBase; +import org.openrdf.query.algebra.evaluation.QueryBindingSet; +import org.openrdf.query.impl.BindingImpl; import org.openrdf.query.parser.ParsedTupleQuery; import org.openrdf.query.parser.sparql.SPARQLParser; import org.openrdf.sail.SailException; import com.google.common.base.Joiner; import com.google.common.base.Optional; +import com.google.common.base.Preconditions; import com.google.common.collect.HashBiMap; +import com.google.common.collect.HashMultimap; import com.google.common.collect.Lists; -import com.google.common.collect.Maps; +import com.google.common.collect.Multimap; import com.google.common.collect.Sets; -import info.aduna.iteration.CloseableIteration; -import mvm.rya.accumulo.precompQuery.AccumuloPcjQuery; -import mvm.rya.api.utils.IteratorWrapper; -import mvm.rya.indexing.PcjQuery; -import mvm.rya.rdftriplestore.evaluation.ExternalBatchingIterator; - /** * During query planning, this node is inserted into the parsed query to * represent part of the original query (a sub-query). This sub-query is the * value returned by {@link ExternalTupleSet#getTupleExpr()}. The results * associated with this sub-query are stored in an external Accumulo table, * where accCon and tablename are the associated {@link Connector} and table - * name. During evaluation, the portion of the query in - * {@link AccumuloIndexSet} is evaluated by scanning the external Accumulo - * table. This class is extremely useful for caching queries and reusing results - * from previous SPARQL queries. + * name. During evaluation, the portion of the query in {@link AccumuloIndexSet} + * is evaluated by scanning the external Accumulo table. This class is extremely + * useful for caching queries and reusing results from previous SPARQL queries. * <p> * * The the {@link TupleExpr} returned by {@link ExternalTupleSet#getTupleExpr()} @@ -89,73 +102,89 @@ import mvm.rya.rdftriplestore.evaluation.ExternalBatchingIterator; * of sub-queries. * */ -public class AccumuloIndexSet extends ExternalTupleSet implements ExternalBatchingIterator { - - private final Connector accCon; //connector to Accumulo table where results are stored - private final String tablename; //name of Accumulo table - private List<String> varOrder = null; // orders in which results are written to table - private final PcjTables pcj = new PcjTables(); - - @Override - public Map<String, Set<String>> getSupportedVariableOrders() { - return this.getSupportedVariableOrderMap(); - } - - @Override - public String getSignature() { - return "AccumuloIndexSet(" + tablename + ") : " + Joiner.on(", ").join(this.getTupleExpr().getBindingNames()); - } - - /** - * - * @param sparql - name of sparql query whose results will be stored in PCJ table - * @param accCon - connection to a valid Accumulo instance - * @param tablename - name of an existing PCJ table - * @throws MalformedQueryException - * @throws SailException - * @throws QueryEvaluationException - * @throws MutationsRejectedException - * @throws TableNotFoundException - */ - public AccumuloIndexSet(final String sparql, final Connector accCon, final String tablename) throws MalformedQueryException, SailException, QueryEvaluationException, - MutationsRejectedException, TableNotFoundException { - this.tablename = tablename; - this.accCon = accCon; - final SPARQLParser sp = new SPARQLParser(); - final ParsedTupleQuery pq = (ParsedTupleQuery) sp.parseQuery(sparql, null); - - final Optional<Projection> projection = new ParsedQueryUtil().findProjection(pq); - if(!projection.isPresent()) { - throw new MalformedQueryException("SPARQL query '" + sparql + "' does not contain a Projection."); - } - setProjectionExpr(projection.get()); - - Set<VariableOrder> orders = null; - try { +public class AccumuloIndexSet extends ExternalTupleSet implements + ExternalBatchingIterator { + + private final Connector accCon; // connector to Accumulo table where results + // are stored + private final String tablename; // name of Accumulo table + private List<String> varOrder = null; // orders in which results are written + // to table + private PcjTables pcj = new PcjTables(); + + @Override + public Map<String, Set<String>> getSupportedVariableOrders() { + return this.getSupportedVariableOrderMap(); + } + + @Override + public String getSignature() { + return "AccumuloIndexSet(" + tablename + ") : " + + Joiner.on(", ").join(this.getTupleExpr().getBindingNames()); + } + + /** + * + * @param sparql + * - name of sparql query whose results will be stored in PCJ + * table + * @param accCon + * - connection to a valid Accumulo instance + * @param tablename + * - name of an existing PCJ table + * @throws MalformedQueryException + * @throws SailException + * @throws QueryEvaluationException + * @throws MutationsRejectedException + * @throws TableNotFoundException + */ + public AccumuloIndexSet(String sparql, Connector accCon, + String tablename) throws MalformedQueryException, SailException, + QueryEvaluationException, MutationsRejectedException, + TableNotFoundException { + this.tablename = tablename; + this.accCon = accCon; + SPARQLParser sp = new SPARQLParser(); + ParsedTupleQuery pq = (ParsedTupleQuery) sp.parseQuery(sparql, null); + TupleExpr te = pq.getTupleExpr(); + Preconditions.checkArgument(PCJOptimizerUtilities.isPCJValid(te), + "TupleExpr is an invalid PCJ."); + + Optional<Projection> projection = new ParsedQueryUtil() + .findProjection(pq); + if (!projection.isPresent()) { + throw new MalformedQueryException("SPARQL query '" + sparql + + "' does not contain a Projection."); + } + setProjectionExpr(projection.get()); + Set<VariableOrder> orders = null; + try { orders = pcj.getPcjMetadata(accCon, tablename).getVarOrders(); } catch (final PcjException e) { e.printStackTrace(); } - varOrder = Lists.newArrayList(); - for(final VariableOrder var: orders) { - varOrder.add(var.toString()); - } - setLocalityGroups(tablename, accCon, varOrder); - this.setSupportedVariableOrderMap(varOrder); - } - - /** - * - * @param accCon - connection to a valid Accumulo instance - * @param tablename - name of an existing PCJ table - * @throws MalformedQueryException - * @throws SailException - * @throws QueryEvaluationException - * @throws MutationsRejectedException - * @throws TableNotFoundException - */ - public AccumuloIndexSet(final Connector accCon, final String tablename) + varOrder = Lists.newArrayList(); + for (final VariableOrder var : orders) { + varOrder.add(var.toString()); + } + setLocalityGroups(tablename, accCon, varOrder); + this.setSupportedVariableOrderMap(varOrder); + } + + /** + * + * @param accCon + * - connection to a valid Accumulo instance + * @param tablename + * - name of an existing PCJ table + * @throws MalformedQueryException + * @throws SailException + * @throws QueryEvaluationException + * @throws MutationsRejectedException + * @throws TableNotFoundException + */ + public AccumuloIndexSet(Connector accCon, String tablename) throws MalformedQueryException, SailException, QueryEvaluationException, MutationsRejectedException, TableNotFoundException { @@ -168,11 +197,12 @@ public class AccumuloIndexSet extends ExternalTupleSet implements ExternalBatchi this.tablename = tablename; this.accCon = accCon; - final SPARQLParser sp = new SPARQLParser(); - final ParsedTupleQuery pq = (ParsedTupleQuery) sp.parseQuery(meta.getSparql(), - null); + SPARQLParser sp = new SPARQLParser(); + ParsedTupleQuery pq = (ParsedTupleQuery) sp.parseQuery( + meta.getSparql(), null); + setProjectionExpr((Projection) pq.getTupleExpr()); - final Set<VariableOrder> orders = meta.getVarOrders(); + Set<VariableOrder> orders = meta.getVarOrders(); varOrder = Lists.newArrayList(); for (final VariableOrder var : orders) { @@ -185,34 +215,37 @@ public class AccumuloIndexSet extends ExternalTupleSet implements ExternalBatchi /** * returns size of table for query planning */ - @Override - public double cardinality() { - double cardinality = 0; - try { - cardinality = pcj.getPcjMetadata(accCon, tablename).getCardinality(); - } catch (final PcjException e) { + @Override + public double cardinality() { + double cardinality = 0; + try { + cardinality = pcj.getPcjMetadata(accCon, tablename) + .getCardinality(); + } catch (PcjException e) { e.printStackTrace(); } - return cardinality; - } - + return cardinality; + } - /** - * - * @param tableName - * @param conn - * @param groups - locality groups to be created - * - * Sets locality groups for more efficient scans - these are usually the variable - * orders in the table so that scans for specific orders are more efficient - */ - private void setLocalityGroups(final String tableName, final Connector conn, final List<String> groups) { + /** + * + * @param tableName + * @param conn + * @param groups + * - locality groups to be created + * + * Sets locality groups for more efficient scans - these are + * usually the variable orders in the table so that scans for + * specific orders are more efficient + */ + private void setLocalityGroups(String tableName, Connector conn, + List<String> groups) { final HashMap<String, Set<Text>> localityGroups = new HashMap<String, Set<Text>>(); for (int i = 0; i < groups.size(); i++) { final HashSet<Text> tempColumn = new HashSet<Text>(); tempColumn.add(new Text(groups.get(i))); - final String groupName = groups.get(i).replace(VAR_ORDER_DELIM, ""); + final String groupName = groups.get(i).replace(VALUE_DELIM, ""); localityGroups.put(groupName, tempColumn); } @@ -225,173 +258,344 @@ public class AccumuloIndexSet extends ExternalTupleSet implements ExternalBatchi } + @Override + public CloseableIteration<BindingSet, QueryEvaluationException> evaluate( + BindingSet bindingset) throws QueryEvaluationException { + return this.evaluate(Collections.singleton(bindingset)); + } + /** + * Core evaluation method used during query evaluation - given a collection + * of binding set constraints, this method finds common binding labels + * between the constraints and table, uses those to build a prefix scan of + * the Accumulo table, and creates a solution binding set by iterating of + * the scan results. + * @param bindingset - collection of {@link BindingSet}s to be joined with PCJ + * @return - CloseableIteration over joined results + */ + @Override + public CloseableIteration<BindingSet, QueryEvaluationException> evaluate( + final Collection<BindingSet> bindingset) + throws QueryEvaluationException { + + if (bindingset.isEmpty()) { + return new IteratorWrapper<BindingSet, QueryEvaluationException>( + new HashSet<BindingSet>().iterator()); + } - @Override - public CloseableIteration<BindingSet,QueryEvaluationException> evaluate(final BindingSet bindingset) throws QueryEvaluationException { - return this.evaluate(Collections.singleton(bindingset)); - } - - /** - * Core evaluation method used during query evaluation - given a collection of binding set constraints, this - * method finds common binding labels between the constraints and table, uses those to build a prefix scan - * of the Accumulo table, and creates a solution binding set by iterating of the scan results. - */ - @Override - public CloseableIteration<BindingSet,QueryEvaluationException> evaluate(final Collection<BindingSet> bindingset) throws QueryEvaluationException { - String localityGroup = ""; - final Set<String> commonVars = Sets.newHashSet(); - // if bindingset is empty, there are no results, so return empty iterator - if (bindingset.isEmpty()) { - return new IteratorWrapper<BindingSet, QueryEvaluationException>(new HashSet<BindingSet>().iterator()); - } - //to build range prefix, find common vars of bindingset and PCJ bindings - else { - final BindingSet bs = bindingset.iterator().next(); - for (final String b : this.getTupleExpr().getAssuredBindingNames()) { - final Binding v = bs.getBinding(b); - if (v != null) { - commonVars.add(b); - } - } - } - //add any constant constraints to common vars to be used in range prefix - commonVars.addAll(getConstantConstraints()); - PcjQuery apq = null; - List<String> fullVarOrder = null; - String commonVarOrder = null; - try { - if (commonVars.size() > 0) { - commonVarOrder = getVarOrder(commonVars); - if(commonVarOrder == null) { - throw new IllegalStateException("Index does not support binding set!"); - } - fullVarOrder = Lists.newArrayList(prefixToOrder(commonVarOrder).split(VAR_ORDER_DELIM)); - //use varOrder and tableVarMap to set correct scan column - localityGroup = orderToLocGroup(fullVarOrder); - } else { - localityGroup = varOrder.get(0); - } - apq = new AccumuloPcjQuery(accCon, tablename); - final ValueMapVisitor vmv = new ValueMapVisitor(); - this.getTupleExpr().visit(vmv); - - List<String> commonVarOrderList = null; - if(commonVarOrder != null) { - commonVarOrderList = Lists.newArrayList(commonVarOrder.split(VAR_ORDER_DELIM)); - } else { - commonVarOrderList = new ArrayList<>(); - } - - return apq.queryPrecompJoin(commonVarOrderList, localityGroup, vmv.getValMap(), - HashBiMap.create(this.getTableVarMap()).inverse(), bindingset); - } catch(final TableNotFoundException e) { - throw new QueryEvaluationException(e); - } - } - - /** - * - * @param order - variable order as indicated by query - * @return - locality group or column family used in scan - this - * is just the variable order expressed in terms of the variables stored - * in the table - */ - private String orderToLocGroup(final List<String> order) { - String localityGroup = ""; - for (final String s : order) { - if (localityGroup.length() == 0) { - localityGroup = this.getTableVarMap().get(s); - } else { - localityGroup = localityGroup + VAR_ORDER_DELIM + this.getTableVarMap().get(s); - } - } - return localityGroup; - } - - /** - * - * @param order - prefix of a full variable order - * @return - full variable order that includes all variables whose values - * are stored in the table - used to obtain the locality group - */ - //given partial order of query vars, convert to PCJ vars and determine - //if converted partial order is a substring of a full var order of PCJ variables. - //if converted partial order is a prefix, convert corresponding full PCJ var order to query vars - private String prefixToOrder(String order) { - final Map<String, String> invMap = HashBiMap.create(this.getTableVarMap()).inverse(); - String[] temp = order.split(VAR_ORDER_DELIM); - //get order in terms of PCJ variables - for (int i = 0; i < temp.length; i++) { - temp[i] = this.getTableVarMap().get(temp[i]); - } - order = Joiner.on(VAR_ORDER_DELIM).join(temp); - for (final String s : varOrder) { - //verify that partial order is prefix of a PCJ varOrder - if (s.startsWith(order)) { - temp = s.split(VAR_ORDER_DELIM); - //convert full PCJ varOrder back to query varOrder - for (int i = 0; i < temp.length; i++) { - temp[i] = invMap.get(temp[i]); - } - return Joiner.on(VAR_ORDER_DELIM).join(temp); - } - } - throw new NoSuchElementException("Order is not a prefix of any locality group value!"); - } - - /** - * - * @param variables - * @return - string representation of the Set variables, in an order that is in the - * table - */ - private String getVarOrder(final Set<String> variables) { - final Map<String, Set<String>> varOrderMap = this.getSupportedVariableOrders(); - final Set<Map.Entry<String, Set<String>>> entries = varOrderMap.entrySet(); - for (final Map.Entry<String, Set<String>> e : entries) { - if (e.getValue().equals(variables)) { - return e.getKey(); - } - } - return null; - } - - /** - * @return - all constraints which correspond to variables - * in {@link AccumuloIndexSet#getTupleExpr()} which are set - * equal to a constant, but are non-constant in Accumulo table - */ - private Set<String> getConstantConstraints() { - final Map<String, String> tableMap = this.getTableVarMap(); - final Set<String> keys = tableMap.keySet(); - final Set<String> constants = Sets.newHashSet(); - for (final String s : keys) { - if (s.startsWith("-const-")) { - constants.add(s); - } - } - return constants; - } - - /** - * - * Extracts the values associated with constant labels in the query - * Used to create binding sets from range scan - */ - public class ValueMapVisitor extends QueryModelVisitorBase<RuntimeException> { - Map<String, org.openrdf.model.Value> valMap = Maps.newHashMap(); - public Map<String, org.openrdf.model.Value> getValMap() { - return valMap; - } - @Override - public void meet(final Var node) { - if (node.getName().startsWith("-const-")) { - valMap.put(node.getName(), node.getValue()); - } - } - } + List<BindingSet> crossProductBs = new ArrayList<>(); + Map<String, org.openrdf.model.Value> constantConstraints = new HashMap<>(); + Set<Range> hashJoinRanges = new HashSet<>(); + final Range EMPTY_RANGE = new Range("", true, "~", false); + Range crossProductRange = EMPTY_RANGE; + String localityGroupOrder = varOrder.get(0); + int maxPrefixLen = Integer.MIN_VALUE; + int prefixLen = 0; + int oldPrefixLen = 0; + Multimap<String, BindingSet> bindingSetHashMap = HashMultimap.create(); + HashJoinType joinType = HashJoinType.CONSTANT_JOIN_VAR; + Set<String> unAssuredVariables = Sets.difference(getTupleExpr().getBindingNames(), getTupleExpr().getAssuredBindingNames()); + boolean useColumnScan = false; + boolean isCrossProd = false; + boolean containsConstantConstraints = false; + BindingSet constants = getConstantConstraints(); + containsConstantConstraints = constants.size() > 0; -} + try { + for (BindingSet bs : bindingset) { + if (bindingset.size() == 1 && bs.size() == 0) { + // in this case, only single, empty bindingset, pcj node is + // first node in query plan - use full Range scan with + // column + // family set + useColumnScan = true; + } + // get common vars for PCJ - only use variables associated + // with assured Bindings + QueryBindingSet commonVars = new QueryBindingSet(); + for (String b : getTupleExpr().getAssuredBindingNames()) { + Binding v = bs.getBinding(b); + if (v != null) { + commonVars.addBinding(v); + } + } + // no common vars implies cross product + if (commonVars.size() == 0 && bs.size() != 0) { + crossProductBs.add(bs); + isCrossProd = true; + } + //get a varOrder from orders in PCJ table - use at least + //one common variable + BindingSetVariableOrder varOrder = getVarOrder( + commonVars.getBindingNames(), + constants.getBindingNames()); + + // update constant constraints not used in varOrder and + // update Bindings used to form range by removing unused + // variables + commonVars.addAll(constants); + if (commonVars.size() > varOrder.varOrderLen) { + Map<String, Value> valMap = getConstantValueMap(); + for (String s : new HashSet<String>(varOrder.unusedVars)) { + if (valMap.containsKey(s) + && !constantConstraints.containsKey(s)) { + constantConstraints.put(s, valMap.get(s)); + } + commonVars.removeBinding(s); + } + } + + if (containsConstantConstraints + && (useColumnScan || isCrossProd)) { + // only one range required in event of a cross product or + // empty BindingSet + // Range will either be full table Range or determined by + // constant constraints + if (crossProductRange == EMPTY_RANGE) { + crossProductRange = getRange(varOrder.varOrder, + commonVars); + localityGroupOrder = prefixToOrder(varOrder.varOrder); + } + } else if (!useColumnScan && !isCrossProd) { + // update ranges and add BindingSet to HashJoinMap if not a + // cross product + hashJoinRanges.add(getRange(varOrder.varOrder, commonVars)); + + prefixLen = varOrder.varOrderLen; + // check if common Variable Orders are changing between + // BindingSets (happens in case + // of Optional). If common variable set length changes from + // BindingSet to BindingSet + // update the HashJoinType to be VARIABLE_JOIN_VAR. + if (oldPrefixLen == 0) { + oldPrefixLen = prefixLen; + } else { + if (oldPrefixLen != prefixLen + && joinType == HashJoinType.CONSTANT_JOIN_VAR) { + joinType = HashJoinType.VARIABLE_JOIN_VAR; + } + oldPrefixLen = prefixLen; + } + // update max prefix len + if (prefixLen > maxPrefixLen) { + maxPrefixLen = prefixLen; + } + String key = getHashJoinKey(varOrder.varOrder, commonVars); + bindingSetHashMap.put(key, bs); + } + + isCrossProd = false; + } + + // create full Range scan iterator and set column family if empty + // collection or if cross product BindingSet exists and no hash join + // BindingSets + if ((useColumnScan || crossProductBs.size() > 0) + && bindingSetHashMap.size() == 0) { + // TODO doesn't use user specified Authorizations + Scanner scanner = accCon.createScanner(tablename, + new Authorizations()); + // cross product with no cross product constraints here + scanner.setRange(crossProductRange); + scanner.fetchColumnFamily(new Text(localityGroupOrder)); + return new PCJKeyToCrossProductBindingSetIterator(scanner, + crossProductBs, constantConstraints, unAssuredVariables, getTableVarMap()); + } else if ((useColumnScan || crossProductBs.size() > 0) + && bindingSetHashMap.size() > 0) { + + // in this case, both hash join BindingSets and cross product + // BindingSets exist + // create an iterator to evaluate cross product and an iterator + // for hash join, then combine + + List<CloseableIteration<BindingSet, QueryEvaluationException>> iteratorList = new ArrayList<>(); + + // create cross product iterator + // TODO doesn't use user specified Authorizations + Scanner scanner1 = accCon.createScanner(tablename, + new Authorizations()); + scanner1.setRange(crossProductRange); + scanner1.fetchColumnFamily(new Text(localityGroupOrder)); + iteratorList.add(new PCJKeyToCrossProductBindingSetIterator( + scanner1, crossProductBs, constantConstraints, unAssuredVariables, + getTableVarMap())); + + // create hash join iterator + // TODO doesn't use user specified Authorizations + BatchScanner scanner2 = accCon.createBatchScanner(tablename, + new Authorizations(), 10); + scanner2.setRanges(hashJoinRanges); + PCJKeyToJoinBindingSetIterator iterator = new PCJKeyToJoinBindingSetIterator( + scanner2, getTableVarMap(), maxPrefixLen); + iteratorList.add(new BindingSetHashJoinIterator( + bindingSetHashMap, iterator, unAssuredVariables, joinType)); + + // combine iterators + return new IteratorCombiner(iteratorList); + + } else { + // only hash join BindingSets exist + // TODO doesn't use user specified auths + BatchScanner scanner = accCon.createBatchScanner(tablename, + new Authorizations(), 10); + // only need to create hash join iterator + scanner.setRanges(hashJoinRanges); + PCJKeyToJoinBindingSetIterator iterator = new PCJKeyToJoinBindingSetIterator( + scanner, getTableVarMap(), maxPrefixLen); + return new BindingSetHashJoinIterator(bindingSetHashMap, + iterator, unAssuredVariables, joinType); + } + } catch (Exception e) { + throw new QueryEvaluationException(e); + } + } + + private String getHashJoinKey(String commonVarOrder, BindingSet bs) { + String[] commonVarArray = commonVarOrder.split(VAR_ORDER_DELIM); + String key = bs.getValue(commonVarArray[0]).toString(); + for (int i = 1; i < commonVarArray.length; i++) { + key = key + VALUE_DELIM + bs.getValue(commonVarArray[i]).toString(); + } + return key; + } + + private Range getRange(String commonVarOrder, BindingSet bs) + throws BindingSetConversionException { + AccumuloPcjSerializer converter = new AccumuloPcjSerializer(); + byte[] rangePrefix = new byte[0]; + rangePrefix = converter.convert(bs, new VariableOrder(commonVarOrder)); + return Range.prefix(new Text(rangePrefix)); + } + /** + * + * @param variableBindingNames + * - names corresponding to variables + * @param constantBindingNames + * - names corresponding to constant constraints + * @return - {@link BindingSetVariableOrder} object containing largest + * possible supported variable order built from variableBindingNames + * and constantBindingNames + */ + private BindingSetVariableOrder getVarOrder( + Set<String> variableBindingNames, Set<String> constantBindingNames) { + Map<String, Set<String>> varOrderMap = this + .getSupportedVariableOrders(); + Set<Map.Entry<String, Set<String>>> entries = varOrderMap.entrySet(); + + Set<String> variables; + if (variableBindingNames.size() == 0 + && constantBindingNames.size() == 0) { + return new BindingSetVariableOrder("", 0, new HashSet<String>()); + } else if (variableBindingNames.size() > 0 + && constantBindingNames.size() == 0) { + variables = variableBindingNames; + } else if (variableBindingNames.size() == 0 + && constantBindingNames.size() > 0) { + variables = constantBindingNames; + } else { + variables = Sets.union(variableBindingNames, constantBindingNames); + + String maxPrefix = null; + int maxPrefixLen = 0; + Set<String> minUnusedVariables = null; + + for (Map.Entry<String, Set<String>> e : entries) { + Set<String> value = e.getValue(); + if (maxPrefixLen < value.size() + && variables.containsAll(value) + && Sets.intersection(value, variableBindingNames) + .size() > 0) { + maxPrefixLen = value.size(); + maxPrefix = e.getKey(); + minUnusedVariables = Sets.difference(variables, value); + if (maxPrefixLen == variables.size()) { + break; + } + } + } + return new BindingSetVariableOrder(maxPrefix, maxPrefixLen, + minUnusedVariables); + } + String maxPrefix = null; + int maxPrefixLen = 0; + Set<String> minUnusedVariables = null; + + for (Map.Entry<String, Set<String>> e : entries) { + Set<String> value = e.getValue(); + if (maxPrefixLen < value.size() && variables.containsAll(value)) { + maxPrefixLen = value.size(); + maxPrefix = e.getKey(); + minUnusedVariables = Sets.difference(variables, value); + if (maxPrefixLen == variables.size()) { + break; + } + } + } + return new BindingSetVariableOrder(maxPrefix, maxPrefixLen, + minUnusedVariables); + } + + /** + * @return - all constraints which correspond to variables in + * {@link AccumuloIndexSet#getTupleExpr()} which are set equal to a + * constant, but are non-constant in Accumulo table + */ + private BindingSet getConstantConstraints() { + Map<String, String> tableMap = this.getTableVarMap(); + Set<String> keys = tableMap.keySet(); + + QueryBindingSet constants = new QueryBindingSet(); + for (String s : keys) { + if (s.startsWith("-const-")) { + constants.addBinding(new BindingImpl(s, getConstantValueMap() + .get(s))); + } + } + return constants; + } + + /** + * + * @param order - prefix of a full variable order + * @return - full variable order that includes all variables whose values + * are stored in the table - used to obtain the locality group + */ + //given partial order of query vars, convert to PCJ vars and determine + //if converted partial order is a substring of a full var order of PCJ variables. + //if converted partial order is a prefix, convert corresponding full PCJ var order to query vars + private String prefixToOrder(String order) { + final Map<String, String> invMap = HashBiMap.create(this.getTableVarMap()).inverse(); + String[] temp = order.split(VAR_ORDER_DELIM); + //get order in terms of PCJ variables + for (int i = 0; i < temp.length; i++) { + temp[i] = this.getTableVarMap().get(temp[i]); + } + order = Joiner.on(VAR_ORDER_DELIM).join(temp); + for (final String s : varOrder) { + //verify that partial order is prefix of a PCJ varOrder + if (s.startsWith(order)) { + return s; + } + } + throw new NoSuchElementException("Order is not a prefix of any locality group value!"); + } + + + private class BindingSetVariableOrder { + + Set<String> unusedVars; + int varOrderLen = 0; + String varOrder; + + public BindingSetVariableOrder(String varOrder, int len, + Set<String> unused) { + this.varOrder = varOrder; + this.varOrderLen = len; + this.unusedVars = unused; + } + + } + +}
http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/96dd55ec/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/ExternalTupleSet.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/ExternalTupleSet.java b/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/ExternalTupleSet.java index ddf691d..b53dd66 100644 --- a/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/ExternalTupleSet.java +++ b/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/ExternalTupleSet.java @@ -33,7 +33,9 @@ import org.openrdf.query.BindingSet; import org.openrdf.query.QueryEvaluationException; import org.openrdf.query.algebra.Projection; import org.openrdf.query.algebra.TupleExpr; +import org.openrdf.query.algebra.Var; import org.openrdf.query.algebra.evaluation.impl.ExternalSet; +import org.openrdf.query.algebra.helpers.QueryModelVisitorBase; import com.beust.jcommander.internal.Sets; import com.google.common.base.Joiner; @@ -57,9 +59,11 @@ public abstract class ExternalTupleSet extends ExternalSet { public static final String VAR_ORDER_DELIM = ";"; public static final String CONST_PREFIX = "-const-"; + public static final String VALUE_DELIM = "\u0000"; private Projection tupleExpr; private Map<String, String> tableVarMap = Maps.newHashMap(); //maps vars in tupleExpr to var in stored binding sets private Map<String, Set<String>> supportedVarOrders = Maps.newHashMap(); //indicates supported var orders + private Map<String, org.openrdf.model.Value> valMap; public ExternalTupleSet() { } @@ -67,6 +71,7 @@ public abstract class ExternalTupleSet extends ExternalSet { public ExternalTupleSet(Projection tupleExpr) { Preconditions.checkNotNull(tupleExpr); this.tupleExpr = tupleExpr; + valMap = getValMap(); updateTableVarMap(tupleExpr, tupleExpr); } @@ -100,6 +105,7 @@ public abstract class ExternalTupleSet extends ExternalSet { updateTableVarMap(tupleExpr, this.tupleExpr); } this.tupleExpr = tupleExpr; + valMap = getValMap(); if (supportedVarOrders.size() != 0) { updateSupportedVarOrderMap(); } @@ -128,10 +134,14 @@ public abstract class ExternalTupleSet extends ExternalSet { return supportedVarOrders; } + public Map<String, org.openrdf.model.Value> getConstantValueMap() { + return valMap; + } + @Override public ExternalSet clone() { final ExternalTupleSet clone = (ExternalTupleSet) super.clone(); - clone.tupleExpr = this.tupleExpr.clone(); + clone.setProjectionExpr(this.tupleExpr.clone()); clone.tableVarMap = Maps.newHashMap(); for(final String s: this.tableVarMap.keySet()) { clone.tableVarMap.put(s,this.tableVarMap.get(s)); @@ -152,7 +162,7 @@ public abstract class ExternalTupleSet extends ExternalSet { final Set<String> bNames = Sets.newHashSet(); final Set<String> bNamesWithConstants = Sets.newHashSet(); - for (final String s : this.getTupleExpr().getAssuredBindingNames()) { + for (final String s : this.getTupleExpr().getBindingNames()) { if (bindingNames.contains(s)) { bNames.add(s); bNamesWithConstants.add(s); @@ -267,4 +277,33 @@ public abstract class ExternalTupleSet extends ExternalSet { return result; } + private Map<String, org.openrdf.model.Value> getValMap() { + ValueMapVisitor valMapVis = new ValueMapVisitor(); + tupleExpr.visit(valMapVis); + return valMapVis.getValMap(); + } + + + /** + * + * Extracts the values associated with constant labels in the query Used to + * create binding sets from range scan + */ + private class ValueMapVisitor extends + QueryModelVisitorBase<RuntimeException> { + Map<String, org.openrdf.model.Value> valMap = Maps.newHashMap(); + + public Map<String, org.openrdf.model.Value> getValMap() { + return valMap; + } + + @Override + public void meet(Var node) { + if (node.getName().startsWith("-const-")) { + valMap.put(node.getName(), node.getValue()); + } + } + } + + } http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/96dd55ec/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/SimpleExternalTupleSet.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/SimpleExternalTupleSet.java b/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/SimpleExternalTupleSet.java index ca97014..2c5ef44 100644 --- a/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/SimpleExternalTupleSet.java +++ b/extras/indexing/src/main/java/mvm/rya/indexing/external/tupleSet/SimpleExternalTupleSet.java @@ -26,8 +26,6 @@ import java.util.HashSet; import java.util.Map; import java.util.Set; -import mvm.rya.indexing.external.PrecompJoinOptimizer; - import org.openrdf.query.BindingSet; import org.openrdf.query.QueryEvaluationException; import org.openrdf.query.algebra.Projection; http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/96dd55ec/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/AbstractPCJMatcher.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/AbstractPCJMatcher.java b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/AbstractPCJMatcher.java new file mode 100644 index 0000000..fdd9ccb --- /dev/null +++ b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/AbstractPCJMatcher.java @@ -0,0 +1,126 @@ +package mvm.rya.indexing.pcj.matching; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import mvm.rya.indexing.external.tupleSet.ExternalTupleSet; +import mvm.rya.indexing.pcj.matching.QueryNodesToTupleExpr.TupleExprAndNodes; + +import org.openrdf.query.algebra.BinaryTupleOperator; +import org.openrdf.query.algebra.Filter; +import org.openrdf.query.algebra.QueryModelNode; +import org.openrdf.query.algebra.TupleExpr; +import org.openrdf.query.algebra.UnaryTupleOperator; + +/** + * This class provides implementations of methods common to all implementations of + * the {@link PCJMatcher} interface. + * + */ +public abstract class AbstractPCJMatcher implements PCJMatcher { + + protected QuerySegment segment; + protected List<QueryModelNode> segmentNodeList; + protected boolean tupleAndNodesUpToDate = false; + protected TupleExpr tuple; + protected Set<TupleExpr> unmatched; + protected PCJToSegment pcjToSegment; + protected Set<Filter> filters; + + /** + * @param - pcj - PremomputedJoin to be matched to a subset of segment + * @return - true if match occurs and false otherwise + */ + @Override + public boolean matchPCJ(ExternalTupleSet pcj) { + QuerySegment sgmnt = pcjToSegment.getSegment(pcj); + if(sgmnt == null) { + throw new IllegalArgumentException("PCJ must contain at east one Join or Left Join"); + } + return matchPCJ(sgmnt, pcj); + } + + /** + * In following method, order is determined by the order in which the + * node appear in the query. + * @return - an ordered view of the QueryModelNodes appearing tuple + * + */ + @Override + public List<QueryModelNode> getOrderedNodes() { + return Collections.unmodifiableList(segmentNodeList); + } + + + @Override + public Set<Filter> getFilters() { + if (!tupleAndNodesUpToDate) { + updateTupleAndNodes(); + } + return filters; + } + + @Override + public TupleExpr getQuery() { + if (!tupleAndNodesUpToDate) { + updateTupleAndNodes(); + } + return tuple; + } + + @Override + public Set<TupleExpr> getUnmatchedArgs() { + if (!tupleAndNodesUpToDate) { + updateTupleAndNodes(); + } + return unmatched; + } + + + private void updateTupleAndNodes() { + TupleExprAndNodes tupAndNodes = segment.getQuery(); + tuple = tupAndNodes.getTupleExpr(); + filters = tupAndNodes.getFilters(); + unmatched = new HashSet<>(); + List<QueryModelNode> nodes = tupAndNodes.getNodes(); + for (QueryModelNode q : nodes) { + if (q instanceof UnaryTupleOperator + || q instanceof BinaryTupleOperator) { + unmatched.add((TupleExpr) q); + } + } + tupleAndNodesUpToDate = true; + } + + + /** + * Interface for converting an {@link ExternalTupleSet} (PCJ) into a + * {@link QuerySegment}. + * + */ + interface PCJToSegment { + public QuerySegment getSegment(ExternalTupleSet pcj); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/96dd55ec/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/AbstractQuerySegment.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/AbstractQuerySegment.java b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/AbstractQuerySegment.java new file mode 100644 index 0000000..2f1e749 --- /dev/null +++ b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/AbstractQuerySegment.java @@ -0,0 +1,122 @@ +package mvm.rya.indexing.pcj.matching; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import mvm.rya.indexing.pcj.matching.QueryNodesToTupleExpr.TupleExprAndNodes; + +import org.openrdf.query.algebra.Filter; +import org.openrdf.query.algebra.QueryModelNode; +import org.openrdf.query.algebra.ValueExpr; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; + +/** + * This class provides implementations of methods common to implementations + * of the {@link QuerySegment} interface. + * + */ +public abstract class AbstractQuerySegment implements QuerySegment { + + + protected List<QueryModelNode> orderedNodes = new ArrayList<>(); + protected Set<QueryModelNode> unorderedNodes; + protected Map<ValueExpr, Filter> conditionMap = Maps.newHashMap(); + + /** + * Returns set view of nodes contained in the segment + */ + @Override + public Set<QueryModelNode> getUnOrderedNodes() { + return Collections.unmodifiableSet(unorderedNodes); + } + + /** + * Returns a list view of nodes contained in this segment, where order is + * determined by the getJoinArgs method + * + * @param TupleExpr + * from top to bottom. + */ + @Override + public List<QueryModelNode> getOrderedNodes() { + return Collections.unmodifiableList(orderedNodes); + } + + /** + * Allows nodes to be reordered using {@link PCJMatcher} and set + * @param nodes - reordering of orderedNodes + */ + @Override + public void setNodes(List<QueryModelNode> nodes) { + Set<QueryModelNode> nodeSet = Sets.newHashSet(nodes); + Preconditions.checkArgument(nodeSet.equals(unorderedNodes)); + orderedNodes = nodes; + unorderedNodes = nodeSet; + } + + /** + * @param query + * - QuerySegment that this method checks for in this + * JoinSegment + */ + @Override + public boolean containsQuerySegment(QuerySegment query) { + return unorderedNodes.containsAll(query.getUnOrderedNodes()); + } + + /** + * @return - a TupleExpr representing this JoinSegment + */ + @Override + public TupleExprAndNodes getQuery() { + List<QueryModelNode> nodeCopy = new ArrayList<>(); + for (QueryModelNode q : orderedNodes) { + if (!(q instanceof ValueExpr)) { + nodeCopy.add(q.clone()); + } + } + QueryNodesToTupleExpr qnt = new QueryNodesToTupleExpr(nodeCopy, getFilters()); + return qnt.getTupleAndNodes(); + } + + @Override + public Set<Filter> getFilters() { + Collection<Filter> filters = conditionMap.values(); + Set<Filter> filterSet = new HashSet<>(); + for (Filter filter : filters) { + filterSet.add(filter.clone()); + } + + return filterSet; + + } + + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/96dd55ec/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/FlattenedOptional.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/FlattenedOptional.java b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/FlattenedOptional.java new file mode 100644 index 0000000..b4f8f28 --- /dev/null +++ b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/FlattenedOptional.java @@ -0,0 +1,331 @@ +package mvm.rya.indexing.pcj.matching; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import mvm.rya.rdftriplestore.inference.DoNotExpandSP; +import mvm.rya.rdftriplestore.utils.FixedStatementPattern; + +import org.openrdf.query.algebra.Filter; +import org.openrdf.query.algebra.Join; +import org.openrdf.query.algebra.LeftJoin; +import org.openrdf.query.algebra.QueryModelNodeBase; +import org.openrdf.query.algebra.QueryModelVisitor; +import org.openrdf.query.algebra.TupleExpr; +import org.openrdf.query.algebra.ValueExpr; +import org.openrdf.query.algebra.Var; + +import com.google.common.collect.Sets; + +/** + * This class is essentially a wrapper for {@link LeftJoin}. It provides a + * flattened view of a LeftJoin that is useful for matching {@AccumuloIndexSet + * } nodes to sub-queries to use for Precomputed Joins. + * Because LeftJoins cannot automatically be interchanged with {@link Join}s and + * other LeftJoins in the query plan, this class has utility methods to check + * when nodes can be interchanged in the query plan. These methods track which + * variables returned by {@link LeftJoin#getRightArg()} are bound. A variable is + * bound if it also contained in the set returned by + * {@link LeftJoin#getLeftArg()}. Nodes can be interchanged with a LeftJoin (and + * hence a FlattenedOptional) so long as the bound and unbound variables do not + * change. + * + */ +public class FlattenedOptional extends QueryModelNodeBase implements TupleExpr { + + private Set<TupleExpr> rightArgs; + private Set<String> boundVars; + private Set<String> unboundVars; + private Map<String, Integer> leftArgVarCounts = new HashMap<String, Integer>(); + private ValueExpr condition; + private TupleExpr rightArg; + private Set<String> bindingNames; + private Set<String> assuredBindingNames; + + public FlattenedOptional(LeftJoin node) { + rightArgs = getJoinArgs(node.getRightArg(), new HashSet<TupleExpr>()); + boundVars = setWithOutConstants(Sets + .intersection(node.getLeftArg().getAssuredBindingNames(), node + .getRightArg().getBindingNames())); + unboundVars = setWithOutConstants(Sets.difference(node.getRightArg() + .getBindingNames(), boundVars)); + condition = node.getCondition(); + rightArg = node.getRightArg(); + getVarCounts(node); + assuredBindingNames = new HashSet<>(leftArgVarCounts.keySet()); + bindingNames = new HashSet<>(Sets.union(assuredBindingNames, + unboundVars)); + } + + public FlattenedOptional(FlattenedOptional optional) { + this.rightArgs = optional.rightArgs; + this.boundVars = optional.boundVars; + this.unboundVars = optional.unboundVars; + this.condition = optional.condition; + this.rightArg = optional.rightArg; + this.leftArgVarCounts = optional.leftArgVarCounts; + this.bindingNames = optional.bindingNames; + this.assuredBindingNames = optional.assuredBindingNames; + } + + public Set<TupleExpr> getRightArgs() { + return rightArgs; + } + + public TupleExpr getRightArg() { + return rightArg; + } + + /** + * + * @param te + * - TupleExpr to be added to leftarg of {@link LeftJoin} + */ + public void addArg(TupleExpr te) { + if (te instanceof FlattenedOptional) { + return; + } + incrementVarCounts(te.getBindingNames()); + } + + public void removeArg(TupleExpr te) { + if (te instanceof FlattenedOptional) { + return; + } + decrementVarCounts(te.getBindingNames()); + } + + /** + * + * @param te + * - {@link TupleExpr} to be added to leftArg of LeftJoin + * @return - true if adding TupleExpr does not affect unbound variables and + * returns false otherwise + */ + public boolean canAddTuple(TupleExpr te) { + // can only add LeftJoin if rightArg varNames do not intersect + // unbound vars + if (te instanceof FlattenedOptional) { + FlattenedOptional lj = (FlattenedOptional) te; + if (Sets.intersection(lj.rightArg.getBindingNames(), unboundVars) + .size() > 0) { + return false; + } else { + return true; + } + } + + return Sets.intersection(te.getBindingNames(), unboundVars).size() == 0; + } + + /** + * + * @param te + * - {@link TupleExpr} to be removed from leftArg of LeftJoin + * @return - true if removing TupleExpr does not affect bound variables and + * returns false otherwise + */ + public boolean canRemoveTuple(TupleExpr te) { + return canRemove(te); + } + + @Override + public Set<String> getBindingNames() { + return bindingNames; + } + + @Override + public Set<String> getAssuredBindingNames() { + return assuredBindingNames; + } + + public ValueExpr getCondition() { + return condition; + } + + @Override + public boolean equals(Object other) { + if (other instanceof FlattenedOptional) { + FlattenedOptional ljDec = (FlattenedOptional) other; + ValueExpr oCond = ljDec.getCondition(); + return nullEquals(condition, oCond) + && ljDec.getRightArgs().equals(rightArgs); + } + return false; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = prime + (rightArgs == null ? 0 : rightArgs.hashCode()); + result = prime * result + + (condition == null ? 0 : condition.hashCode()); + return result; + } + + /** + * This method is used to retrieve a set view of all descendants of the + * rightArg of the LeftJoin (the optional part) + * + * @param tupleExpr + * - tupleExpr whose args are being retrieved + * @param joinArgs + * - set view of all non-join args that are descendants of + * tupleExpr + * @return joinArgs + */ + private Set<TupleExpr> getJoinArgs(TupleExpr tupleExpr, + Set<TupleExpr> joinArgs) { + if (tupleExpr instanceof Join) { + if (!(((Join) tupleExpr).getLeftArg() instanceof FixedStatementPattern) + && !(((Join) tupleExpr).getRightArg() instanceof DoNotExpandSP)) { + Join join = (Join) tupleExpr; + getJoinArgs(join.getLeftArg(), joinArgs); + getJoinArgs(join.getRightArg(), joinArgs); + } + } else if (tupleExpr instanceof LeftJoin) { // TODO probably not + // necessary if not + // including leftarg + LeftJoin lj = (LeftJoin) tupleExpr; + joinArgs.add(new FlattenedOptional(lj)); + getJoinArgs(lj.getLeftArg(), joinArgs); + } else if (tupleExpr instanceof Filter) { + getJoinArgs(((Filter) tupleExpr).getArg(), joinArgs); + } else { + joinArgs.add(tupleExpr); + } + + return joinArgs; + } + + /** + * This method counts the number of times each variable appears in the + * leftArg of the LeftJoin defining this FlattenedOptional. This information + * is used to whether nodes can be moved out of the leftarg above the + * LeftJoin in the query. + * + * @param tupleExpr + */ + private void getVarCounts(TupleExpr tupleExpr) { + if (tupleExpr instanceof Join) { + Join join = (Join) tupleExpr; + getVarCounts(join.getLeftArg()); + getVarCounts(join.getRightArg()); + } else if (tupleExpr instanceof LeftJoin) { + LeftJoin lj = (LeftJoin) tupleExpr; + getVarCounts(lj.getLeftArg()); + } else if (tupleExpr instanceof Filter) { + getVarCounts(((Filter) tupleExpr).getArg()); + } else { + incrementVarCounts(tupleExpr.getBindingNames()); + } + } + + /** + * + * @param te + * - {@link TupleExpr} to be removed from leftArg of LeftJoin + * @return - true if removing te doesn't affect bounded variables of + * LeftJoin and false otherwise + */ + private boolean canRemove(TupleExpr te) { + // can only remove LeftJoin if right varNames do not intersect + // unbound vars + if (te instanceof FlattenedOptional) { + FlattenedOptional lj = (FlattenedOptional) te; + if (Sets.intersection(lj.getRightArg().getBindingNames(), + unboundVars).size() > 0) { + return false; + } else { + return true; + } + } + Set<String> vars = te.getBindingNames(); + Set<String> intersection = Sets.intersection(vars, boundVars); + if (intersection.size() == 0) { + return true; + } + for (String s : intersection) { + if (leftArgVarCounts.containsKey(s) && leftArgVarCounts.get(s) == 1) { + return false; + } + } + return true; + } + + private void incrementVarCounts(Set<String> vars) { + for (String s : vars) { + if (!s.startsWith("-const-") && leftArgVarCounts.containsKey(s)) { + leftArgVarCounts.put(s, leftArgVarCounts.get(s) + 1); + } else if (!s.startsWith("-const-")) { + leftArgVarCounts.put(s, 1); + } + } + } + + private void decrementVarCounts(Set<String> vars) { + for (String s : vars) { + if (leftArgVarCounts.containsKey(s) && leftArgVarCounts.get(s) > 1) { + leftArgVarCounts.put(s, leftArgVarCounts.get(s) - 1); + } else { + leftArgVarCounts.remove(s); + bindingNames.remove(s); + assuredBindingNames.remove(s); + } + } + } + + /** + * + * @param vars + * - set of {@link Var} names, possibly contained constants + */ + private Set<String> setWithOutConstants(Set<String> vars) { + Set<String> copy = new HashSet<>(); + for (String s : vars) { + if (!s.startsWith("-const-")) { + copy.add(s); + } + } + + return copy; + } + + @Override + public <X extends Exception> void visit(QueryModelVisitor<X> visitor) + throws X { + throw new UnsupportedOperationException(); + } + + @Override + public String toString() { + return "FlattenedOptional: " + rightArgs; + } + + @Override + public FlattenedOptional clone() { + return new FlattenedOptional(this); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/96dd55ec/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/JoinSegment.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/JoinSegment.java b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/JoinSegment.java new file mode 100644 index 0000000..30f36cf --- /dev/null +++ b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/JoinSegment.java @@ -0,0 +1,130 @@ +package mvm.rya.indexing.pcj.matching; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.List; +import java.util.Set; + +import mvm.rya.indexing.external.tupleSet.ExternalTupleSet; +import mvm.rya.rdftriplestore.inference.DoNotExpandSP; +import mvm.rya.rdftriplestore.utils.FixedStatementPattern; + +import org.openrdf.query.algebra.Filter; +import org.openrdf.query.algebra.Join; +import org.openrdf.query.algebra.QueryModelNode; +import org.openrdf.query.algebra.TupleExpr; +import org.openrdf.query.algebra.ValueExpr; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Sets; + +/** + * This class represents a portion of a {@link TupleExpr} query that PCJ queries + * are compared to. A JoinSegment is comprised of a collection of + * {@link QueryModelNode}s that are connected by {@link Join}s. In the case, the + * QueryModelNodes can commute within the JoinSegment, which makes JoinSegments + * a natural way to partition a query for PCJ matching. A query is decomposed + * into JoinSegments and PCJ queries can easily be compared to the {@link QueryModelNode}s + * contained in the segment using set operations. + * + */ +public class JoinSegment extends AbstractQuerySegment { + + public JoinSegment(Join join) { + Preconditions.checkNotNull(join); + createJoinSegment(join); + } + + public JoinSegment(Filter filter) { + Preconditions.checkNotNull(filter); + createJoinSegment(filter); + } + + private void createJoinSegment(TupleExpr te) { + orderedNodes = getJoinArgs(te, orderedNodes); + unorderedNodes = Sets.newHashSet(orderedNodes); + } + + /** + * This method matches the ordered nodes returned by + * {@link JoinSegment#getOrderedNodes()} for nodeToReplace with a subset of + * the ordered nodes for this JoinSegment. The order of the nodes for + * nodeToReplace must match the order of the nodes as a subset of + * orderedNodes + * + * @param nodeToReplace + * - nodes to be replaced by pcj + * @param pcj + * - pcj node that will replace specified query nodes + */ + @Override + public boolean replaceWithPcj(QuerySegment nodeToReplace, + ExternalTupleSet pcj) { + Preconditions.checkNotNull(nodeToReplace != null); + Preconditions.checkNotNull(pcj); + if (!containsQuerySegment(nodeToReplace)) { + return false; + } + Set<QueryModelNode> nodeSet = nodeToReplace.getUnOrderedNodes(); + orderedNodes.removeAll(nodeSet); + orderedNodes.add(pcj); + unorderedNodes.removeAll(nodeSet); + unorderedNodes.add(pcj); + for (QueryModelNode q : nodeSet) { + if (q instanceof ValueExpr) { + conditionMap.remove(q); + } + } + return true; + } + + /** + * + * @param tupleExpr + * - the query object that will be traversed by this method + * @param joinArgs + * - all nodes connected by Joins and Filters + * @return - List containing all nodes connected by Joins, LeftJoins, and + * Filters. This List contains the + * @param ValueExpr + * in place of the Filter + */ + private List<QueryModelNode> getJoinArgs(TupleExpr tupleExpr, + List<QueryModelNode> joinArgs) { + + if (tupleExpr instanceof Join) { + if (!(((Join) tupleExpr).getLeftArg() instanceof FixedStatementPattern) + && !(((Join) tupleExpr).getRightArg() instanceof DoNotExpandSP)) { + Join join = (Join) tupleExpr; + getJoinArgs(join.getRightArg(), joinArgs); + getJoinArgs(join.getLeftArg(), joinArgs); + } + } else if (tupleExpr instanceof Filter) { + Filter filter = (Filter) tupleExpr; + joinArgs.add(filter.getCondition()); + conditionMap.put(filter.getCondition(), filter); + getJoinArgs(filter.getArg(), joinArgs); + } else { + joinArgs.add(tupleExpr); + } + return joinArgs; + } + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/96dd55ec/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/JoinSegmentPCJMatcher.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/JoinSegmentPCJMatcher.java b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/JoinSegmentPCJMatcher.java new file mode 100644 index 0000000..29c4188 --- /dev/null +++ b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/JoinSegmentPCJMatcher.java @@ -0,0 +1,101 @@ +package mvm.rya.indexing.pcj.matching; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.ArrayList; + +import mvm.rya.indexing.external.tupleSet.ExternalTupleSet; + +import org.openrdf.query.algebra.Filter; +import org.openrdf.query.algebra.Join; +import org.openrdf.query.algebra.QueryModelNode; +import org.openrdf.query.algebra.TupleExpr; +import org.openrdf.query.algebra.helpers.QueryModelVisitorBase; + +/** + * This class is responsible for matching PCJ nodes with subsets of the + * {@link QueryModelNode}s found in {@link JoinSegment}s. Each PCJ is reduced to + * a bag of QueryModelNodes and set operations can be used to determine if the + * PCJ is a subset of the JoinSegment. If it is a subset, the PCJ node replaces + * the QueryModelNodes in the JoinSegment. + * + */ + +public class JoinSegmentPCJMatcher extends AbstractPCJMatcher { + + public JoinSegmentPCJMatcher(Join join) { + segment = new JoinSegment(join); + segmentNodeList = new ArrayList<>(segment.getOrderedNodes()); + pcjToSegment = new PCJToJoinSegment(); + } + + public JoinSegmentPCJMatcher(Filter filter) { + segment = new JoinSegment(filter); + segmentNodeList = new ArrayList<>(segment.getOrderedNodes()); + pcjToSegment = new PCJToJoinSegment(); + } + + /** + * @param pcjNodes + * - {@link QueryModelNode}s to be replaced + * @param pcj + * - the PCJ node to be compared to pcjNodes + */ + @Override + public boolean matchPCJ(QuerySegment pcjNodes, ExternalTupleSet pcj) { + boolean nodesReplaced = segment.replaceWithPcj(pcjNodes, pcj); + if (nodesReplaced) { + tupleAndNodesUpToDate = false; + segmentNodeList = segment.getOrderedNodes(); + } + + return nodesReplaced; + } + + /** + * This class extracts the {@link JoinSegment} from the {@link TupleExpr} of + * specified PCJ. + * + */ + static class PCJToJoinSegment extends + QueryModelVisitorBase<RuntimeException> implements PCJToSegment { + + private JoinSegment segment; + + @Override + public QuerySegment getSegment(ExternalTupleSet pcj) { + segment = null; + pcj.getTupleExpr().visit(this); + return segment; + } + + @Override + public void meet(Join join) { + segment = new JoinSegment(join); + } + + @Override + public void meet(Filter filter) { + segment = new JoinSegment(filter); + } + + } + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/96dd55ec/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/OptionalJoinSegment.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/OptionalJoinSegment.java b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/OptionalJoinSegment.java new file mode 100644 index 0000000..ebe5243 --- /dev/null +++ b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/OptionalJoinSegment.java @@ -0,0 +1,146 @@ +package mvm.rya.indexing.pcj.matching; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.List; + +import mvm.rya.indexing.external.tupleSet.ExternalTupleSet; +import mvm.rya.rdftriplestore.inference.DoNotExpandSP; +import mvm.rya.rdftriplestore.utils.FixedStatementPattern; + +import org.openrdf.query.algebra.Filter; +import org.openrdf.query.algebra.Join; +import org.openrdf.query.algebra.LeftJoin; +import org.openrdf.query.algebra.QueryModelNode; +import org.openrdf.query.algebra.TupleExpr; +import org.openrdf.query.algebra.ValueExpr; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Sets; + +/** + * An OptionalJoinSegment represents the portion of a {@link TupleExpr} that is + * connected by Filters, LeftJoins, and Joins. All nodes in the portion of the + * TupleExpr that are connected via these node types are gathered into an + * ordered and an unordered list that can easily be compared with + * {@link ExternalTupleSet} nodes for sub-query matching to use with Precomputed + * Joins. + * + */ +public class OptionalJoinSegment extends AbstractQuerySegment { + + public OptionalJoinSegment(Join join) { + Preconditions.checkNotNull(join); + createJoinSegment(join); + } + + public OptionalJoinSegment(LeftJoin join) { + Preconditions.checkNotNull(join); + createJoinSegment(join); + } + + public OptionalJoinSegment(Filter filter) { + Preconditions.checkNotNull(filter); + createJoinSegment(filter); + } + + private void createJoinSegment(TupleExpr te) { + orderedNodes = getJoinArgs(te, orderedNodes); + unorderedNodes = Sets.newHashSet(orderedNodes); + } + + /** + * This method matches the ordered nodes returned by + * {@link JoinSegment #getOrderedNodes()} for nodeToReplace with a subset of + * the ordered nodes for this JoinSegment. The order of the nodes for + * nodeToReplace must match the order of the nodes as a subset of + * orderedNodes + * + * @param nodeToReplace + * - nodes to be replaced by pcj + * @param pcj + * - pcj node that will replace specified query nodes + */ + @Override + public boolean replaceWithPcj(QuerySegment nodeToReplace, + ExternalTupleSet pcj) { + Preconditions.checkNotNull(nodeToReplace != null); + Preconditions.checkNotNull(pcj); + if (!containsQuerySegment(nodeToReplace)) { + return false; + } + List<QueryModelNode> nodeList = nodeToReplace.getOrderedNodes(); + int begin = orderedNodes.indexOf(nodeList.get(0)); + // TODO this assumes no duplicate nodes + if (begin < 0 + || begin + nodeList.size() > orderedNodes.size() + || !nodeList.equals(orderedNodes.subList(begin, begin + + nodeList.size()))) { + return false; + } + orderedNodes.removeAll(nodeList); + orderedNodes.add(begin, pcj); + unorderedNodes.removeAll(nodeList); + unorderedNodes.add(pcj); + for (QueryModelNode q : nodeList) { + if (q instanceof ValueExpr) { + conditionMap.remove(q); + } + } + return true; + } + + /** + * + * @param tupleExpr + * - the query object that will be traversed by this method + * @param joinArgs + * - all nodes connected by Joins, LeftJoins, and Filters + * @return - List containing all nodes connected by Joins, LeftJoins, and + * Filters. This List contains the {@link ValueExpr} in place of the + * Filter and a {@link FlattenedOptional} in place of the LeftJoin + * for ease of comparison with PCJ nodes. + */ + private List<QueryModelNode> getJoinArgs(TupleExpr tupleExpr, + List<QueryModelNode> joinArgs) { + + if (tupleExpr instanceof Join) { + if (!(((Join) tupleExpr).getLeftArg() instanceof FixedStatementPattern) + && !(((Join) tupleExpr).getRightArg() instanceof DoNotExpandSP)) { + Join join = (Join) tupleExpr; + getJoinArgs(join.getRightArg(), joinArgs); + getJoinArgs(join.getLeftArg(), joinArgs); + } + } else if (tupleExpr instanceof LeftJoin) { + LeftJoin lj = (LeftJoin) tupleExpr; + joinArgs.add(new FlattenedOptional(lj)); + getJoinArgs(lj.getLeftArg(), joinArgs); + } else if (tupleExpr instanceof Filter) { + Filter filter = (Filter) tupleExpr; + joinArgs.add(filter.getCondition()); + conditionMap.put(filter.getCondition(), filter); + getJoinArgs(filter.getArg(), joinArgs); + } else { + joinArgs.add(tupleExpr); + } + return joinArgs; + } + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/96dd55ec/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/OptionalJoinSegmentPCJMatcher.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/OptionalJoinSegmentPCJMatcher.java b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/OptionalJoinSegmentPCJMatcher.java new file mode 100644 index 0000000..37f6867 --- /dev/null +++ b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/OptionalJoinSegmentPCJMatcher.java @@ -0,0 +1,142 @@ +package mvm.rya.indexing.pcj.matching; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.ArrayList; +import java.util.List; + +import mvm.rya.indexing.external.tupleSet.ExternalTupleSet; + +import org.openrdf.query.algebra.Filter; +import org.openrdf.query.algebra.Join; +import org.openrdf.query.algebra.LeftJoin; +import org.openrdf.query.algebra.QueryModelNode; +import org.openrdf.query.algebra.helpers.QueryModelVisitorBase; + +/** + * This class matches PCJ queries to sub-queries of a given + * {@link OptionalJoinSegment}. A match will occur when the + * {@link QueryModelNode}s of the PCJ can be grouped together + * in the OptionalJoinSegment and ordered to match the PCJ query. + * + */ + +public class OptionalJoinSegmentPCJMatcher extends AbstractPCJMatcher { + + public OptionalJoinSegmentPCJMatcher(Join join) { + segment = new OptionalJoinSegment(join); + segmentNodeList = new ArrayList<>(segment.getOrderedNodes()); + pcjToSegment = new PCJToOptionalJoinSegment(); + } + + public OptionalJoinSegmentPCJMatcher(LeftJoin join) { + segment = new OptionalJoinSegment(join); + segmentNodeList = new ArrayList<>(segment.getOrderedNodes()); + pcjToSegment = new PCJToOptionalJoinSegment(); + } + + public OptionalJoinSegmentPCJMatcher(Filter filter) { + segment = new OptionalJoinSegment(filter); + segmentNodeList = new ArrayList<>(segment.getOrderedNodes()); + pcjToSegment = new PCJToOptionalJoinSegment(); + } + + /** + * @param pcjNodes - {@link QuerySegment} to be replaced by PCJ + * @param pcj - PCJ to replace matchin QuerySegment + */ + @Override + public boolean matchPCJ(QuerySegment pcjNodes, ExternalTupleSet pcj) { + + if(!segment.containsQuerySegment(pcjNodes)) { + return false; + } + List<QueryModelNode> consolidatedNodes = groupNodesToMatchPCJ(getOrderedNodes(), pcjNodes.getOrderedNodes()); + if(consolidatedNodes.size() == 0) { + return false; + } + + //set segment nodes to the consolidated nodes to match pcj + segment.setNodes(consolidatedNodes); + boolean nodesReplaced = segment.replaceWithPcj(pcjNodes, pcj); + + //if pcj nodes replaced queryNodes, update segmentNodeList + //otherwise restore segment nodes back to original pre-consolidated state + if(nodesReplaced) { + segmentNodeList = segment.getOrderedNodes(); + tupleAndNodesUpToDate = false; + } else { + segment.setNodes(segmentNodeList); + } + + return nodesReplaced; + } + + /** + * + * @param queryNodes - query nodes to be compared to pcj for matching + * @param pcjNodes - pcj nodes to match to query + * @return - query nodes with pcj nodes grouped together (if possible), otherwise return + * an empty list. + */ + private List<QueryModelNode> groupNodesToMatchPCJ(List<QueryModelNode> queryNodes, List<QueryModelNode> pcjNodes) { + PCJNodeConsolidator pnc = new PCJNodeConsolidator(queryNodes, pcjNodes); + boolean canConsolidate = pnc.consolidateNodes(); + if(canConsolidate) { + return pnc.getQueryNodes(); + } + return new ArrayList<QueryModelNode>(); + } + + + /** + * This class extracts the {@link OptionalJoinSegment} of PCJ query. + * + */ + static class PCJToOptionalJoinSegment extends QueryModelVisitorBase<RuntimeException> implements PCJToSegment { + + private OptionalJoinSegment segment; + + @Override + public QuerySegment getSegment(ExternalTupleSet pcj) { + segment = null; + pcj.getTupleExpr().visit(this); + return segment; + } + + @Override + public void meet(Join join) { + segment = new OptionalJoinSegment(join); + } + + @Override + public void meet(Filter filter) { + segment = new OptionalJoinSegment(filter); + } + + @Override + public void meet(LeftJoin node) { + segment = new OptionalJoinSegment(node); + } + + } + + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/96dd55ec/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/PCJMatcher.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/PCJMatcher.java b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/PCJMatcher.java new file mode 100644 index 0000000..d98d367 --- /dev/null +++ b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/PCJMatcher.java @@ -0,0 +1,76 @@ +package mvm.rya.indexing.pcj.matching; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.List; +import java.util.Set; + +import mvm.rya.indexing.external.tupleSet.ExternalTupleSet; + +import org.openrdf.query.algebra.Filter; +import org.openrdf.query.algebra.QueryModelNode; +import org.openrdf.query.algebra.TupleExpr; + +/** + * This interface provides a framework for matching PCJ {@link ExternalTupleSet}s + * to subsets of a given {@link QuerySegment}. + * + */ +public interface PCJMatcher { + + /** + * + * @param pcjNodes - QuerySegment representation of PCJ to be used for matching + * @param pcj - {@link ExternalTupleSet} used to replace matching PCJ nodes when match occurs + * @return - true is match and replace occurs and false otherwise + */ + public boolean matchPCJ(QuerySegment pcjNodes, ExternalTupleSet pcj); + + /** + * + * @param pcj - {@link ExternalTupleSet} used to replace matching PCJ nodes when match occurs + * @return - true is match and replace occurs and false otherwise + */ + public boolean matchPCJ(ExternalTupleSet pcj); + + /** + * @return - TupleExpr constructed from {@link QuerySegment} with matched nodes + */ + public TupleExpr getQuery(); + + /** + * + * @return - all {@link TupleExpr} that haven't been matched to a PCJ + */ + public Set<TupleExpr> getUnmatchedArgs(); + + /** + * + * @return - provided ordered view of QuerySegment nodes + */ + public List<QueryModelNode> getOrderedNodes(); + + /** + * + * @return - Set of {@link Filter}s of given QuerySegment + */ + public Set<Filter> getFilters(); + +} http://git-wip-us.apache.org/repos/asf/incubator-rya/blob/96dd55ec/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/PCJMatcherFactory.java ---------------------------------------------------------------------- diff --git a/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/PCJMatcherFactory.java b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/PCJMatcherFactory.java new file mode 100644 index 0000000..3a42a68 --- /dev/null +++ b/extras/indexing/src/main/java/mvm/rya/indexing/pcj/matching/PCJMatcherFactory.java @@ -0,0 +1,73 @@ +package mvm.rya.indexing.pcj.matching; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import org.openrdf.query.algebra.Filter; +import org.openrdf.query.algebra.Join; +import org.openrdf.query.algebra.LeftJoin; +import org.openrdf.query.algebra.Projection; +import org.openrdf.query.algebra.TupleExpr; + +/** + * This class takes in a given {@link Join}, {@Filter}, or {@link LeftJoin} + * and provides the appropriate {@link PCJMatcher} to match PCJs to the + * given query. + * + */ + +public class PCJMatcherFactory { + + public static PCJMatcher getPCJMatcher(Join join) { + if (segmentContainsLeftJoins(join)) { + return new OptionalJoinSegmentPCJMatcher(join); + } else { + return new JoinSegmentPCJMatcher(join); + } + } + + public static PCJMatcher getPCJMatcher(LeftJoin join) { + return new OptionalJoinSegmentPCJMatcher(join); + } + + public static PCJMatcher getPCJMatcher(Filter filter) { + if (segmentContainsLeftJoins(filter)) { + return new OptionalJoinSegmentPCJMatcher(filter); + } else { + return new JoinSegmentPCJMatcher(filter); + } + } + + private static boolean segmentContainsLeftJoins(TupleExpr tupleExpr) { + + if (tupleExpr instanceof Projection) { + return segmentContainsLeftJoins(((Projection) tupleExpr).getArg()); + } else if (tupleExpr instanceof Join) { + Join join = (Join) tupleExpr; + return segmentContainsLeftJoins(join.getRightArg()) + || segmentContainsLeftJoins(join.getLeftArg()); + } else if (tupleExpr instanceof LeftJoin) { + return true; + } else if (tupleExpr instanceof Filter) { + return segmentContainsLeftJoins(((Filter) tupleExpr).getArg()); + } else { + return false; + } + } +}
