This is an automated email from the ASF dual-hosted git repository.

andy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/jena.git


The following commit(s) were added to refs/heads/main by this push:
     new 495d404a84 GH-2535: Always use hash joins when joining VALUES blocks
495d404a84 is described below

commit 495d404a8421d495513c9731cfda8b2d4ec78f2b
Author: Claus Stadler <[email protected]>
AuthorDate: Wed Jun 12 14:49:50 2024 +0200

    GH-2535: Always use hash joins when joining VALUES blocks
---
 .../jena/sparql/engine/main/JoinClassifier.java    | 89 ++++++++++++++++++++++
 .../apache/jena/sparql/algebra/TestClassify.java   | 30 +++++++-
 2 files changed, 116 insertions(+), 3 deletions(-)

diff --git 
a/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java 
b/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java
index 3fd325bd87..9b1b13bb8d 100644
--- 
a/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java
+++ 
b/jena-arq/src/main/java/org/apache/jena/sparql/engine/main/JoinClassifier.java
@@ -18,8 +18,10 @@
 
 package org.apache.jena.sparql.engine.main ;
 
+import java.util.Iterator;
 import java.util.Set ;
 
+import org.apache.jena.atlas.iterator.Iter;
 import org.apache.jena.atlas.lib.SetUtils ;
 import org.apache.jena.sparql.algebra.Op ;
 import org.apache.jena.sparql.algebra.OpVisitor ;
@@ -63,6 +65,12 @@ public class JoinClassifier
         // Lateral is different.
         if ( right instanceof OpLateral )   return false ;
 
+        // Do not linearize when effectively joining tables - i.e. force hash 
joins between tables.
+        Basis leftBasis = getBasis(left);
+        Basis rightBasis = getBasis(right);
+        if ( leftBasis == Basis.TABLE && rightBasis == Basis.TABLE )
+            return false ;
+
         // Assume something will not commute these later on.
         return check(left, right) ;
     }
@@ -235,4 +243,85 @@ public class JoinClassifier
             return false ;
         return op instanceof OpDistinct || op instanceof OpReduced || op 
instanceof OpProject || op instanceof OpList ;
     }
+
+    /** Enumeration of the primitive op types in the SPARQL algebra. */
+    private enum Basis {
+        PATTERN,
+        TABLE,
+        PFUNCTION
+    }
+
+    /**
+     * This method checks whether the given op is based on {@link OpTable} or 
{@link OpPropFunc}.
+     * If neither is the case then the result is {@link Basis#PATTERN}.
+     * <p>
+     * This method is called for each side of a join from {@link #isLinear(Op, 
Op)}.
+     * If that side of the join is a property function then {@link 
Basis#PFUNCTION} is returned.
+     *
+     * <p>
+     * The special handling of property functions is due to that
+     * OpJoin(TABLE, PFUNCTION) needs to be linearized to OpSequence(TABLE, 
PFUNCTION).
+     *
+     * <p>
+     * This method resolves OpExt to its effective op.
+     */
+    private static Basis getBasis(Op op) {
+        Basis result;
+        if (op instanceof OpTable) {
+            result = Basis.TABLE;
+        } else if (op instanceof OpPropFunc) {
+            result = Basis.PFUNCTION;
+        } else if (op instanceof OpExt) {
+            Op effectiveOp = ((OpExt)op).effectiveOp();
+            result = effectiveOp == null
+                    ? Basis.PATTERN // Assume pattern
+                    : getBasis(effectiveOp);
+        } else if (op instanceof Op1) {
+            result = getBasis(((Op1)op).getSubOp());
+        } else {
+            Iterator<Op> it = getSubOps(op);
+            if (!it.hasNext()) { // Op0 that is not a table (e.g. OpPath, 
OpQuad, OpTriple, ...)
+                result = Basis.PATTERN;
+            } else { // Op2, OpN
+                result = Basis.TABLE; // Start with table; if any argument 
evaluates to pattern then change to pattern.
+                while (it.hasNext()) {
+                    Op subOp = it.next();
+                    Basis contrib = getBasis(subOp);
+
+                    // Treat property functions in sub operations with more 
than one argument as tables
+                    if (contrib == Basis.PFUNCTION) {
+                        contrib = Basis.TABLE;
+                    }
+
+                    if (contrib != Basis.TABLE) {
+                        result = Basis.PATTERN;
+                        break;
+                    }
+                }
+            }
+        }
+        return result;
+    }
+
+    /***
+     * Return an iterator over the given op's immediate sub ops.
+     * An empty iterator is returned for Op0 and OpExt.
+     */
+    // XXX This method could go to OpLib or the Op interface directly
+    private static Iterator<Op> getSubOps(Op op) {
+        if (op instanceof Op1)
+            return Iter.singleton(((Op1)op).getSubOp());
+
+        if (op instanceof Op2) {
+            Op2 x = (Op2)op;
+            return Iter.of(x.getLeft(), x.getRight());
+        }
+
+        if (op instanceof OpN) {
+            return ((OpN)op).iterator();
+        }
+
+        // Op0 and OpExt are treated as having no sub ops
+        return Iter.empty();
+    }
 }
diff --git 
a/jena-arq/src/test/java/org/apache/jena/sparql/algebra/TestClassify.java 
b/jena-arq/src/test/java/org/apache/jena/sparql/algebra/TestClassify.java
index 24010f89e3..a225f69e3c 100644
--- a/jena-arq/src/test/java/org/apache/jena/sparql/algebra/TestClassify.java
+++ b/jena-arq/src/test/java/org/apache/jena/sparql/algebra/TestClassify.java
@@ -179,10 +179,10 @@ public class TestClassify
         TestClassify.classifyJ(x1, false);
     }
 
-    /** Can linearize because rhs binds ?x*/
+    /** Could linearize because rhs binds ?x, however, tables on both sides 
prefers hash join */
     @Test public void testClassify_Join_70b() {
         String x1 = "{ VALUES ?x { UNDEF } VALUES ?x { 0 } }";
-        TestClassify.classifyJ(x1, true);
+        TestClassify.classifyJ(x1, false);
     }
 
     /** Can't linearize because rhs does not bind ?x */
@@ -203,9 +203,33 @@ public class TestClassify
         TestClassify.classifyJ(x1, false);
     }
 
-    /** Can linearize because rhs binds ?x*/
+    /** Could linearize because rhs binds ?x, however, unit tables (beneath 
BIND) on both sides prefers hash join */
     @Test public void testClassify_Join_82() {
         String x1 = "{ BIND('x' AS ?x) { BIND('y' AS ?x) FILTER(?x < 1) } }";
+        TestClassify.classifyJ(x1, false);
+    }
+
+    /** Can linearize because rhs binds ?z */
+    @Test public void testClassify_Join_90a() {
+        String x1 = "{ ?x ?y ?z VALUES ?z { 0 } }";
+        TestClassify.classifyJ(x1, true);
+    }
+
+    /** Can linearize because rhs binds ?z (bgps must bind mentioned 
variables) */
+    @Test public void testClassify_Join_90b() {
+        String x1 = "{ VALUES ?z { 0 } ?x ?y ?z }";
+        TestClassify.classifyJ(x1, true);
+    }
+
+    /** Can't linearize because rhs does not bind ?z */
+    @Test public void testClassify_Join_91a() {
+        String x1 = "{ ?x ?y ?z  VALUES ?z { UNDEF } }";
+        TestClassify.classifyJ(x1, false);
+    }
+
+    /** Can linearize because rhs binds ?z */
+    @Test public void testClassify_Join_91b() {
+        String x1 = "{ VALUES ?z { UNDEF } ?x ?y ?z }";
         TestClassify.classifyJ(x1, true);
     }
 

Reply via email to