This is an automated email from the ASF dual-hosted git repository.
andy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/jena.git
The following commit(s) were added to refs/heads/main by this push:
new 495d404a84 GH-2535: Always use hash joins when joining VALUES blocks
495d404a84 is described below
commit 495d404a8421d495513c9731cfda8b2d4ec78f2b
Author: Claus Stadler <[email protected]>
AuthorDate: Wed Jun 12 14:49:50 2024 +0200
GH-2535: Always use hash joins when joining VALUES blocks
---
.../jena/sparql/engine/main/JoinClassifier.java | 89 ++++++++++++++++++++++
.../apache/jena/sparql/algebra/TestClassify.java | 30 +++++++-
2 files changed, 116 insertions(+), 3 deletions(-)
diff --git
a/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java
b/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java
index 3fd325bd87..9b1b13bb8d 100644
---
a/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java
+++
b/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java
@@ -18,8 +18,10 @@
package org.apache.jena.sparql.engine.main ;
+import java.util.Iterator;
import java.util.Set ;
+import org.apache.jena.atlas.iterator.Iter;
import org.apache.jena.atlas.lib.SetUtils ;
import org.apache.jena.sparql.algebra.Op ;
import org.apache.jena.sparql.algebra.OpVisitor ;
@@ -63,6 +65,12 @@ public class JoinClassifier
// Lateral is different.
if ( right instanceof OpLateral ) return false ;
+ // Do not linearize when effectively joining tables - i.e. force hash
joins between tables.
+ Basis leftBasis = getBasis(left);
+ Basis rightBasis = getBasis(right);
+ if ( leftBasis == Basis.TABLE && rightBasis == Basis.TABLE )
+ return false ;
+
// Assume something will not commute these later on.
return check(left, right) ;
}
@@ -235,4 +243,85 @@ public class JoinClassifier
return false ;
return op instanceof OpDistinct || op instanceof OpReduced || op
instanceof OpProject || op instanceof OpList ;
}
+
+ /** Enumeration of the primitive op types in the SPARQL algebra. */
+ private enum Basis {
+ PATTERN,
+ TABLE,
+ PFUNCTION
+ }
+
+ /**
+ * This method checks whether the given op is based on {@link OpTable} or
{@link OpPropFunc}.
+ * If neither is the case then the result is {@link Basis#PATTERN}.
+ * <p>
+ * This method is called for each side of a join from {@link #isLinear(Op,
Op)}.
+ * If that side of the join is a property function then {@link
Basis#PFUNCTION} is returned.
+ *
+ * <p>
+ * The special handling of property functions is due to that
+ * OpJoin(TABLE, PFUNCTION) needs to be linearized to OpSequence(TABLE,
PFUNCTION).
+ *
+ * <p>
+ * This method resolves OpExt to its effective op.
+ */
+ private static Basis getBasis(Op op) {
+ Basis result;
+ if (op instanceof OpTable) {
+ result = Basis.TABLE;
+ } else if (op instanceof OpPropFunc) {
+ result = Basis.PFUNCTION;
+ } else if (op instanceof OpExt) {
+ Op effectiveOp = ((OpExt)op).effectiveOp();
+ result = effectiveOp == null
+ ? Basis.PATTERN // Assume pattern
+ : getBasis(effectiveOp);
+ } else if (op instanceof Op1) {
+ result = getBasis(((Op1)op).getSubOp());
+ } else {
+ Iterator<Op> it = getSubOps(op);
+ if (!it.hasNext()) { // Op0 that is not a table (e.g. OpPath,
OpQuad, OpTriple, ...)
+ result = Basis.PATTERN;
+ } else { // Op2, OpN
+ result = Basis.TABLE; // Start with table; if any argument
evaluates to pattern then change to pattern.
+ while (it.hasNext()) {
+ Op subOp = it.next();
+ Basis contrib = getBasis(subOp);
+
+ // Treat property functions in sub operations with more
than one argument as tables
+ if (contrib == Basis.PFUNCTION) {
+ contrib = Basis.TABLE;
+ }
+
+ if (contrib != Basis.TABLE) {
+ result = Basis.PATTERN;
+ break;
+ }
+ }
+ }
+ }
+ return result;
+ }
+
+ /***
+ * Return an iterator over the given op's immediate sub ops.
+ * An empty iterator is returned for Op0 and OpExt.
+ */
+ // XXX This method could go to OpLib or the Op interface directly
+ private static Iterator<Op> getSubOps(Op op) {
+ if (op instanceof Op1)
+ return Iter.singleton(((Op1)op).getSubOp());
+
+ if (op instanceof Op2) {
+ Op2 x = (Op2)op;
+ return Iter.of(x.getLeft(), x.getRight());
+ }
+
+ if (op instanceof OpN) {
+ return ((OpN)op).iterator();
+ }
+
+ // Op0 and OpExt are treated as having no sub ops
+ return Iter.empty();
+ }
}
diff --git
a/jena-arq/src/test/java/org/apache/jena/sparql/algebra/TestClassify.java
b/jena-arq/src/test/java/org/apache/jena/sparql/algebra/TestClassify.java
index 24010f89e3..a225f69e3c 100644
--- a/jena-arq/src/test/java/org/apache/jena/sparql/algebra/TestClassify.java
+++ b/jena-arq/src/test/java/org/apache/jena/sparql/algebra/TestClassify.java
@@ -179,10 +179,10 @@ public class TestClassify
TestClassify.classifyJ(x1, false);
}
- /** Can linearize because rhs binds ?x*/
+ /** Could linearize because rhs binds ?x, however, tables on both sides
prefers hash join */
@Test public void testClassify_Join_70b() {
String x1 = "{ VALUES ?x { UNDEF } VALUES ?x { 0 } }";
- TestClassify.classifyJ(x1, true);
+ TestClassify.classifyJ(x1, false);
}
/** Can't linearize because rhs does not bind ?x */
@@ -203,9 +203,33 @@ public class TestClassify
TestClassify.classifyJ(x1, false);
}
- /** Can linearize because rhs binds ?x*/
+ /** Could linearize because rhs binds ?x, however, unit tables (beneath
BIND) on both sides prefers hash join */
@Test public void testClassify_Join_82() {
String x1 = "{ BIND('x' AS ?x) { BIND('y' AS ?x) FILTER(?x < 1) } }";
+ TestClassify.classifyJ(x1, false);
+ }
+
+ /** Can linearize because rhs binds ?z */
+ @Test public void testClassify_Join_90a() {
+ String x1 = "{ ?x ?y ?z VALUES ?z { 0 } }";
+ TestClassify.classifyJ(x1, true);
+ }
+
+ /** Can linearize because rhs binds ?z (bgps must bind mentioned
variables) */
+ @Test public void testClassify_Join_90b() {
+ String x1 = "{ VALUES ?z { 0 } ?x ?y ?z }";
+ TestClassify.classifyJ(x1, true);
+ }
+
+ /** Can't linearize because rhs does not bind ?z */
+ @Test public void testClassify_Join_91a() {
+ String x1 = "{ ?x ?y ?z VALUES ?z { UNDEF } }";
+ TestClassify.classifyJ(x1, false);
+ }
+
+ /** Can linearize because rhs binds ?z */
+ @Test public void testClassify_Join_91b() {
+ String x1 = "{ VALUES ?z { UNDEF } ?x ?y ?z }";
TestClassify.classifyJ(x1, true);
}