Repository: marmotta Updated Branches: refs/heads/develop becba188a -> 564282f0a
working implementation of MINUS (by translating to NOT EXISTS), fixes MARMOTTA-541 Project: http://git-wip-us.apache.org/repos/asf/marmotta/repo Commit: http://git-wip-us.apache.org/repos/asf/marmotta/commit/564282f0 Tree: http://git-wip-us.apache.org/repos/asf/marmotta/tree/564282f0 Diff: http://git-wip-us.apache.org/repos/asf/marmotta/diff/564282f0 Branch: refs/heads/develop Commit: 564282f0ae3068686373a44d91971aecb490096d Parents: becba18 Author: Sebastian Schaffert <[email protected]> Authored: Fri Sep 19 12:09:19 2014 +0200 Committer: Sebastian Schaffert <[email protected]> Committed: Fri Sep 19 12:09:19 2014 +0200 ---------------------------------------------------------------------- .../kiwi/sparql/builder/ExtensionFinder.java | 4 +- .../kiwi/sparql/builder/SQLBuilder.java | 22 ++++- .../kiwi/sparql/builder/SQLVariable.java | 13 ++- .../sparql/optimizer/DifferenceOptimizer.java | 93 ++++++++++++++++++++ .../sparql/sail/KiWiSparqlSailConnection.java | 4 + .../kiwi/sparql/test/KiWiSparqlJoinTest.java | 1 - .../marmotta/kiwi/sparql/test/query32.sparql | 2 +- 7 files changed, 134 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/marmotta/blob/564282f0/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/ExtensionFinder.java ---------------------------------------------------------------------- diff --git a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/ExtensionFinder.java b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/ExtensionFinder.java index 2012f16..191a275 100644 --- a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/ExtensionFinder.java +++ b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/ExtensionFinder.java @@ -42,6 +42,9 @@ public class ExtensionFinder extends QueryModelVisitorBase<RuntimeException> { @Override public void meet(Extension node) throws RuntimeException { + // visit children before, as there might be dependencies + super.meet(node); + for(ExtensionElem elem : node.getElements()) { if(elem.getExpr() instanceof Var && ((Var) elem.getExpr()).getName().equals(elem.getName())) { log.debug("ignoring self-aliasing of variable {}", elem.getName()); @@ -49,7 +52,6 @@ public class ExtensionFinder extends QueryModelVisitorBase<RuntimeException> { elements.add(elem); } } - super.meet(node); } @Override http://git-wip-us.apache.org/repos/asf/marmotta/blob/564282f0/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLBuilder.java ---------------------------------------------------------------------- diff --git a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLBuilder.java b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLBuilder.java index 52e85b6..8790edf 100644 --- a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLBuilder.java +++ b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLBuilder.java @@ -326,6 +326,7 @@ public class SQLBuilder { // add all extensions to the variable list so they are properly considered in projections and clauses + // TODO: order by variable dependency, or otherwise the evaluateExpression might fail for(ExtensionElem ext : extensions) { Var v = new Var(ext.getName()); @@ -724,7 +725,7 @@ public class SQLBuilder { // TODO: need to make sure that variables of the parent are visible in the subquery // - pattern names need to be unique even in subqueries // - variable lookup for expressions in the subquery need to refer to the parent - SQLBuilder sq_builder = new SQLBuilder(((Exists) expr).getSubQuery(), bindings, dataset, converter, dialect, "_", Collections.EMPTY_SET, variables); + SQLBuilder sq_builder = new SQLBuilder(((Exists) expr).getSubQuery(), bindings, dataset, converter, dialect, "_", Collections.EMPTY_SET, copyVariables(variables)); return "EXISTS (" + sq_builder.build() + ")"; } else if(expr instanceof Str) { @@ -962,6 +963,25 @@ public class SQLBuilder { throw new IllegalArgumentException("unsupported value expression: "+expr); } + /** + * Copy variables from the set to a new set suitable for a subquery; this allows passing over variable expressions + * from parent queries to subqueries without the subquery adding expressions that are then not visible outside + * @param variables + * @return + */ + private Map<String, SQLVariable> copyVariables(Map<String, SQLVariable> variables) { + Map<String,SQLVariable> copy = new HashMap<>(); + try { + for(Map.Entry<String,SQLVariable> entry : variables.entrySet()) { + copy.put(entry.getKey(), (SQLVariable) entry.getValue().clone()); + } + } catch (CloneNotSupportedException e) { + log.error("could not clone SQL variable:",e); + } + + return copy; + } + private String castExpression(String arg, OPTypes type) { if(type == null) { return arg; http://git-wip-us.apache.org/repos/asf/marmotta/blob/564282f0/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLVariable.java ---------------------------------------------------------------------- diff --git a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLVariable.java b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLVariable.java index 6fcd3d8..bd1f85e 100644 --- a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLVariable.java +++ b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/builder/SQLVariable.java @@ -30,7 +30,7 @@ import java.util.List; * * @author Sebastian Schaffert ([email protected]) */ -public class SQLVariable { +public class SQLVariable implements Cloneable{ /** * A map for mapping the SPARQL variable names to internal names used for constructing SQL aliases. @@ -155,4 +155,15 @@ public class SQLVariable { return Collator.getInstance().compare(l.getName(), r.getName()); } }; + + @Override + protected Object clone() throws CloneNotSupportedException { + SQLVariable clone = new SQLVariable(getName(), getSparqlName()); + clone.projectionType = projectionType; + clone.getExpressions().addAll(expressions); + clone.getAliases().addAll(aliases); + clone.getBindings().addAll(bindings); + + return clone; + } } http://git-wip-us.apache.org/repos/asf/marmotta/blob/564282f0/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/optimizer/DifferenceOptimizer.java ---------------------------------------------------------------------- diff --git a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/optimizer/DifferenceOptimizer.java b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/optimizer/DifferenceOptimizer.java new file mode 100644 index 0000000..b6f05f9 --- /dev/null +++ b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/optimizer/DifferenceOptimizer.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.marmotta.kiwi.sparql.optimizer; + +import com.google.common.collect.Sets; +import org.openrdf.query.BindingSet; +import org.openrdf.query.Dataset; +import org.openrdf.query.algebra.*; +import org.openrdf.query.algebra.evaluation.QueryOptimizer; +import org.openrdf.query.algebra.helpers.QueryModelVisitorBase; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashSet; +import java.util.Set; + +/** + * This optimizer replaces occurrences of Difference with constructs that can be translated to SQL more easily: + * - in case the left and right argument have shared variables, the right argument is replaced by a Filter with a NOT EXISTS subquery + * - in case the left and right argument have no shared variables, the right argument is ignored + * + * Note that this transformation does not respect the case 3 documented in the SPARQL standard regarding variable scoping + * of inner FILTERs (http://www.w3.org/TR/sparql11-query/#neg-notexists-minus). This is a border case and almost impossible to + * translate to SQL with its unification semantics. + * + * @author Sebastian Schaffert ([email protected]) + */ +public class DifferenceOptimizer implements QueryOptimizer { + + private static Logger log = LoggerFactory.getLogger(DifferenceOptimizer.class); + + @Override + public void optimize(TupleExpr tupleExpr, Dataset dataset, BindingSet bindings) { + tupleExpr.visit(new DifferenceReplacer()); + } + + + private static class DifferenceReplacer extends QueryModelVisitorBase<RuntimeException> { + + @Override + public void meet(Difference node) throws RuntimeException { + Set<String> leftVars = new VariableFinder(node.getLeftArg()).variables; + Set<String> rightVars = new VariableFinder(node.getRightArg()).variables; + + if(Sets.intersection(leftVars,rightVars).size() > 0) { + // left and right share variables, so we replace with a FILTER with NOT EXISTS + log.debug("replacing SPARQL MINUS with a FILTER on NOT EXISTS, because there are shared variables"); + log.debug("AST before:\n{}", node); + + ValueExpr notExists = new Not(new Exists(node.getRightArg())); + Filter replacement = new Filter(node.getLeftArg(), notExists); + node.replaceWith(replacement); + + log.debug("AST after:\n{}", node); + } else { + // left and right do not share variables, so we replace with the left subquery + log.debug("replacing SPARQL MINUS with its left argument, because there are no variables"); + node.replaceWith(node.getLeftArg()); + } + } + } + + + private static class VariableFinder extends QueryModelVisitorBase<RuntimeException> { + Set<String> variables = new HashSet<>(); + + public VariableFinder(TupleExpr expr) { + expr.visit(this); + } + + @Override + public void meet(Var node) throws RuntimeException { + if(!node.isAnonymous() && !node.isConstant()) { + variables.add(node.getName()); + } + } + } +} http://git-wip-us.apache.org/repos/asf/marmotta/blob/564282f0/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/sail/KiWiSparqlSailConnection.java ---------------------------------------------------------------------- diff --git a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/sail/KiWiSparqlSailConnection.java b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/sail/KiWiSparqlSailConnection.java index b0954f0..bb634af 100644 --- a/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/sail/KiWiSparqlSailConnection.java +++ b/libraries/kiwi/kiwi-sparql/src/main/java/org/apache/marmotta/kiwi/sparql/sail/KiWiSparqlSailConnection.java @@ -22,6 +22,7 @@ import org.apache.marmotta.kiwi.sail.KiWiValueFactory; import org.apache.marmotta.kiwi.sparql.evaluation.KiWiEvaluationStatistics; import org.apache.marmotta.kiwi.sparql.evaluation.KiWiEvaluationStrategyImpl; import org.apache.marmotta.kiwi.sparql.evaluation.KiWiTripleSource; +import org.apache.marmotta.kiwi.sparql.optimizer.DifferenceOptimizer; import org.apache.marmotta.kiwi.sparql.optimizer.DistinctLimitOptimizer; import org.apache.marmotta.kiwi.sparql.persistence.KiWiSparqlConnection; import org.openrdf.query.BindingSet; @@ -84,6 +85,9 @@ public class KiWiSparqlSailConnection extends NotifyingSailConnectionWrapper { //new OrderLimitOptimizer().optimize(tupleExpr, dataset, bindings); new DistinctLimitOptimizer().optimize(tupleExpr, dataset, bindings); + // replace Difference with NOT EXISTS + new DifferenceOptimizer().optimize(tupleExpr,dataset,bindings); + log.debug("evaluating SPARQL query:\n {}", tupleExpr); return strategy.evaluate(tupleExpr, EmptyBindingSet.getInstance()); http://git-wip-us.apache.org/repos/asf/marmotta/blob/564282f0/libraries/kiwi/kiwi-sparql/src/test/java/org/apache/marmotta/kiwi/sparql/test/KiWiSparqlJoinTest.java ---------------------------------------------------------------------- diff --git a/libraries/kiwi/kiwi-sparql/src/test/java/org/apache/marmotta/kiwi/sparql/test/KiWiSparqlJoinTest.java b/libraries/kiwi/kiwi-sparql/src/test/java/org/apache/marmotta/kiwi/sparql/test/KiWiSparqlJoinTest.java index 9bc95e3..7fa9c6a 100644 --- a/libraries/kiwi/kiwi-sparql/src/test/java/org/apache/marmotta/kiwi/sparql/test/KiWiSparqlJoinTest.java +++ b/libraries/kiwi/kiwi-sparql/src/test/java/org/apache/marmotta/kiwi/sparql/test/KiWiSparqlJoinTest.java @@ -320,7 +320,6 @@ public class KiWiSparqlJoinTest { // minus @Test - @Ignore("not implemented yet, see MARMOTTA-541") public void testQuery32() throws Exception { testQuery("query32.sparql"); } http://git-wip-us.apache.org/repos/asf/marmotta/blob/564282f0/libraries/kiwi/kiwi-sparql/src/test/resources/org/apache/marmotta/kiwi/sparql/test/query32.sparql ---------------------------------------------------------------------- diff --git a/libraries/kiwi/kiwi-sparql/src/test/resources/org/apache/marmotta/kiwi/sparql/test/query32.sparql b/libraries/kiwi/kiwi-sparql/src/test/resources/org/apache/marmotta/kiwi/sparql/test/query32.sparql index cd6a6e5..58f8d98 100644 --- a/libraries/kiwi/kiwi-sparql/src/test/resources/org/apache/marmotta/kiwi/sparql/test/query32.sparql +++ b/libraries/kiwi/kiwi-sparql/src/test/resources/org/apache/marmotta/kiwi/sparql/test/query32.sparql @@ -19,7 +19,7 @@ PREFIX foaf: <http://xmlns.com/foaf/0.1/> PREFIX dc: <http://purl.org/dc/elements/1.1/> SELECT DISTINCT ?s WHERE { - ?s ?p ?o . + ?s a foaf:Person . MINUS { ?s foaf:name "Hans Meier" . }
