This is an automated email from the ASF dual-hosted git repository.
silun pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new 5a3526cb4b [CALCITE-7385] Support LEFT_MARK type for nested loop join
in enumerable convention
5a3526cb4b is described below
commit 5a3526cb4b5e161d379ba698d4da086556ea6eb6
Author: Silun Dong <[email protected]>
AuthorDate: Mon Jan 26 23:57:54 2026 +0800
[CALCITE-7385] Support LEFT_MARK type for nested loop join in enumerable
convention
---
.../adapter/enumerable/EnumerableJoinRule.java | 10 +-
.../enumerable/EnumerableNestedLoopJoin.java | 39 ++++++
.../org/apache/calcite/util/BuiltInMethod.java | 4 +
.../test/enumerable/EnumerableJoinTest.java | 146 +++++++++++++++++++++
.../apache/calcite/linq4j/DefaultEnumerable.java | 7 +
.../apache/calcite/linq4j/EnumerableDefaults.java | 55 ++++++++
.../apache/calcite/linq4j/ExtendedEnumerable.java | 13 ++
7 files changed, 265 insertions(+), 9 deletions(-)
diff --git
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableJoinRule.java
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableJoinRule.java
index 08a301da5a..57a6778e0d 100644
---
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableJoinRule.java
+++
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableJoinRule.java
@@ -21,14 +21,10 @@
import org.apache.calcite.rel.convert.ConverterRule;
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinInfo;
-import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.logical.LogicalJoin;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexUtil;
-import org.apache.calcite.util.Bug;
-
-import org.checkerframework.checker.nullness.qual.Nullable;
import java.util.ArrayList;
import java.util.Arrays;
@@ -52,7 +48,7 @@ protected EnumerableJoinRule(Config config) {
super(config);
}
- @Override public @Nullable RelNode convert(RelNode rel) {
+ @Override public RelNode convert(RelNode rel) {
Join join = (Join) rel;
List<RelNode> newInputs = new ArrayList<>();
for (RelNode input : join.getInputs()) {
@@ -96,10 +92,6 @@ protected EnumerableJoinRule(Config config) {
join.getVariablesSet(),
join.getJoinType());
}
- if (!Bug.TODO_FIXED && join.getJoinType() == JoinRelType.LEFT_MARK) {
- // TODO Support LEFT MARK type for nested loop join
- return null;
- }
return EnumerableNestedLoopJoin.create(
left,
right,
diff --git
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableNestedLoopJoin.java
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableNestedLoopJoin.java
index de539ad574..545329726f 100644
---
a/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableNestedLoopJoin.java
+++
b/core/src/main/java/org/apache/calcite/adapter/enumerable/EnumerableNestedLoopJoin.java
@@ -150,6 +150,45 @@ public static EnumerableNestedLoopJoin create(
}
@Override public Result implement(EnumerableRelImplementor implementor,
Prefer pref) {
+ switch (joinType) {
+ case LEFT_MARK:
+ return implementNLMarkJoin(implementor, pref);
+ default:
+ return implementNLJoin(implementor, pref);
+ }
+ }
+
+ private Result implementNLMarkJoin(EnumerableRelImplementor implementor,
Prefer pref) {
+ final BlockBuilder builder = new BlockBuilder();
+ final Result leftResult =
+ implementor.visitChild(this, 0, (EnumerableRel) left, pref);
+ Expression leftExpression =
+ builder.append("left", leftResult.block);
+ final Result rightResult =
+ implementor.visitChild(this, 1, (EnumerableRel) right, pref);
+ Expression rightExpression =
+ builder.append("right", rightResult.block);
+ final PhysType physType =
+ PhysTypeImpl.of(implementor.getTypeFactory(),
+ getRowType(),
+ pref.preferArray());
+ final Expression predicate =
+ EnumUtils.generatePredicate(implementor, getCluster().getRexBuilder(),
left, right,
+ leftResult.physType, rightResult.physType, condition, true);
+ return implementor.result(
+ physType,
+ builder.append(
+ Expressions.call(
+ leftExpression,
+ BuiltInMethod.LEFT_MARK_NESTED_LOOP_JOIN.method,
+ Expressions.list(
+ rightExpression,
+ predicate,
+ EnumUtils.markJoinSelector(physType, leftResult.physType))))
+ .toBlock());
+ }
+
+ private Result implementNLJoin(EnumerableRelImplementor implementor, Prefer
pref) {
final BlockBuilder builder = new BlockBuilder();
final Result leftResult =
implementor.visitChild(this, 0, (EnumerableRel) left, pref);
diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
index 8b96db847a..6adf1aa394 100644
--- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
+++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
@@ -259,6 +259,10 @@ public enum BuiltInMethod {
EqualityComparer.class, Predicate2.class),
NESTED_LOOP_JOIN(EnumerableDefaults.class, "nestedLoopJoin",
Enumerable.class,
Enumerable.class, Predicate2.class, Function2.class, JoinType.class),
+ LEFT_MARK_NESTED_LOOP_JOIN(ExtendedEnumerable.class,
"leftMarkNestedLoopJoin",
+ Enumerable.class, // inner enumerable
+ NullablePredicate2.class, // non-equi predicate that can return NULL
+ Function2.class), // result selector
CORRELATE_JOIN(ExtendedEnumerable.class, "correlateJoin",
JoinType.class, Function1.class, Function2.class),
CORRELATE_BATCH_JOIN(EnumerableDefaults.class, "correlateBatchJoin",
diff --git
a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableJoinTest.java
b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableJoinTest.java
index a93bd94692..56393e184a 100644
---
a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableJoinTest.java
+++
b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableJoinTest.java
@@ -24,12 +24,19 @@
import org.apache.calcite.interpreter.Bindables;
import org.apache.calcite.plan.RelOptPlanner;
import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.metadata.DefaultRelMetadataProvider;
+import org.apache.calcite.rel.rules.CoreRules;
import org.apache.calcite.runtime.Hook;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.test.CalciteAssert;
import org.apache.calcite.test.schemata.hr.HierarchySchema;
import org.apache.calcite.test.schemata.hr.HrSchema;
import org.apache.calcite.test.schemata.hr.HrSchemaBig;
+import org.apache.calcite.tools.Program;
+import org.apache.calcite.tools.Programs;
+import org.apache.calcite.util.Holder;
+
+import com.google.common.collect.ImmutableList;
import org.junit.jupiter.api.Test;
@@ -440,6 +447,145 @@ private void
checkMergeJoinWithCompositeKeyAndNullValues(boolean bigSchema, Join
"empid=5; name=Emp5");
}
+ /** Test case for
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-7385">[CALCITE-7385]
+ * Support LEFT_MARK type for nested loop join in enumerable convention</a>.
*/
+ @Test void testLeftMarkJoinBasedNestedLoop() {
+ Program subQuery =
+ Programs.hep(
+ ImmutableList.of(CoreRules.PROJECT_SUB_QUERY_TO_MARK_CORRELATE),
+ true,
+ DefaultRelMetadataProvider.INSTANCE);
+ Program subQueryWithoutMarkJoin =
+ Programs.hep(
+ ImmutableList.of(CoreRules.PROJECT_SUB_QUERY_TO_CORRELATE),
+ true,
+ DefaultRelMetadataProvider.INSTANCE);
+ Program toCalc =
+ Programs.hep(
+ ImmutableList.of(CoreRules.PROJECT_TO_CALC,
CoreRules.FILTER_TO_CALC,
+ CoreRules.CALC_MERGE),
+ true,
+ DefaultRelMetadataProvider.INSTANCE);
+ Program enumerableImpl =
Programs.ofRules(EnumerableRules.ENUMERABLE_RULES);
+
+ // case1: left mark join from uncorrelated SOME subquery
+ CalciteAssert.AssertQuery test1 =
+ tester(false, new HrSchema()).query(
+ "WITH t1(id) as (VALUES (1), (2), (NULL)), t2(id) as (VALUES (2),
(3)) "
+ + "select id, id >= SOME(select id from t2) as marker from
t1");
+ // result of new subquery removal and decorrelation algorithms
+ test1
+ .withHook(Hook.PROGRAM, (Consumer<Holder<Program>>) program -> {
+ program.set(Programs.sequence(subQuery, toCalc, enumerableImpl));
+ })
+ .explainHookMatches(
+ "EnumerableNestedLoopJoin(condition=[>=($0, $1)],
joinType=[left_mark])\n"
+ + " EnumerableValues(tuples=[[{ 1 }, { 2 }, { null }]])\n"
+ + " EnumerableCalc(expr#0=[{inputs}], id=[$t0])\n"
+ + " EnumerableValues(tuples=[[{ 2 }, { 3 }]])\n")
+ .returnsUnordered(
+ "id=1; marker=false",
+ "id=2; marker=true",
+ "id=null; marker=null");
+ // result of the old
+ test1
+ .withHook(Hook.PROGRAM, (Consumer<Holder<Program>>) program -> {
+ program.set(Programs.sequence(subQueryWithoutMarkJoin, toCalc,
enumerableImpl));
+ })
+ .returnsUnordered(
+ "id=1; marker=false",
+ "id=2; marker=true",
+ "id=null; marker=null");
+
+ // case2: left mark join whose condition is simplified to a NULL constant
+ CalciteAssert.AssertQuery test2 =
+ tester(false, new HrSchema()).query(
+ "WITH t1(id) as (VALUES (1), (2), (NULL)), t2(id) as (VALUES (2),
(3), (null)) "
+ + "select id, cast(null as int) = SOME(select cast(id as int)
as id from t2) "
+ + "as marker from t1");
+ // result of new subquery removal and decorrelation algorithms
+ test2
+ .withHook(Hook.PROGRAM, (Consumer<Holder<Program>>) program -> {
+ program.set(Programs.sequence(subQuery, toCalc, enumerableImpl));
+ })
+ .explainHookMatches(
+ "EnumerableNestedLoopJoin(condition=[null:BOOLEAN],
joinType=[left_mark])\n"
+ + " EnumerableValues(tuples=[[{ 1 }, { 2 }, { null }]])\n"
+ + " EnumerableCalc(expr#0=[{inputs}], id=[$t0])\n"
+ + " EnumerableValues(tuples=[[{ 2 }, { 3 }, { null }]])\n")
+ .returnsUnordered(
+ "id=1; marker=null",
+ "id=2; marker=null",
+ "id=null; marker=null");
+ // result of the old
+ test2
+ .withHook(Hook.PROGRAM, (Consumer<Holder<Program>>) program -> {
+ program.set(Programs.sequence(subQueryWithoutMarkJoin, toCalc,
enumerableImpl));
+ })
+ .returnsUnordered(
+ "id=1; marker=null",
+ "id=2; marker=null",
+ "id=null; marker=null");
+
+ // case3: left mark join from uncorrelated EXISTS subquery
+ CalciteAssert.AssertQuery test3 =
+ tester(false, new HrSchema()).query(
+ "WITH t1(id) as (VALUES (1), (2), (NULL)), t2(id) as (VALUES (2),
(3), (NULL)) "
+ + "select id, EXISTS(select id from t2) as marker from t1");
+ // result of new subquery removal and decorrelation algorithms
+ test3
+ .withHook(Hook.PROGRAM, (Consumer<Holder<Program>>) program -> {
+ program.set(Programs.sequence(subQuery, toCalc, enumerableImpl));
+ })
+ .explainHookMatches(
+ "EnumerableNestedLoopJoin(condition=[true],
joinType=[left_mark])\n"
+ + " EnumerableValues(tuples=[[{ 1 }, { 2 }, { null }]])\n"
+ + " EnumerableValues(tuples=[[{ 2 }, { 3 }, { null }]])\n")
+ .returnsUnordered(
+ "id=1; marker=true",
+ "id=2; marker=true",
+ "id=null; marker=true");
+ // result of the old
+ test3
+ .withHook(Hook.PROGRAM, (Consumer<Holder<Program>>) program -> {
+ program.set(Programs.sequence(subQueryWithoutMarkJoin, toCalc,
enumerableImpl));
+ })
+ .returnsUnordered(
+ "id=1; marker=true",
+ "id=2; marker=true",
+ "id=null; marker=true");
+
+ // case4: left mark join from uncorrelated EXISTS subquery that is empty
+ CalciteAssert.AssertQuery test4 =
+ tester(false, new HrSchema()).query(
+ "WITH t1(id) as (VALUES (1), (2), (NULL)), t2(id) as (VALUES (2),
(3), (NULL)) "
+ + "select id, EXISTS(select id from t2 where false) as marker
from t1");
+ // result of new subquery removal and decorrelation algorithms
+ test4
+ .withHook(Hook.PROGRAM, (Consumer<Holder<Program>>) program -> {
+ program.set(Programs.sequence(subQuery, toCalc, enumerableImpl));
+ })
+ .explainHookMatches(
+ "EnumerableNestedLoopJoin(condition=[true],
joinType=[left_mark])\n"
+ + " EnumerableValues(tuples=[[{ 1 }, { 2 }, { null }]])\n"
+ + " EnumerableCalc(expr#0=[{inputs}], expr#1=[false],
EXPR$0=[$t0], $condition=[$t1])\n"
+ + " EnumerableValues(tuples=[[{ 2 }, { 3 }, { null }]])\n")
+ .returnsUnordered(
+ "id=1; marker=false",
+ "id=2; marker=false",
+ "id=null; marker=false");
+ // result of the old
+ test4
+ .withHook(Hook.PROGRAM, (Consumer<Holder<Program>>) program -> {
+ program.set(Programs.sequence(subQueryWithoutMarkJoin, toCalc,
enumerableImpl));
+ })
+ .returnsUnordered(
+ "id=1; marker=false",
+ "id=2; marker=false",
+ "id=null; marker=false");
+ }
+
private CalciteAssert.AssertThat tester(boolean forceDecorrelate,
Object schema) {
return CalciteAssert.that()
diff --git
a/linq4j/src/main/java/org/apache/calcite/linq4j/DefaultEnumerable.java
b/linq4j/src/main/java/org/apache/calcite/linq4j/DefaultEnumerable.java
index f90c9f112d..8a45548d31 100644
--- a/linq4j/src/main/java/org/apache/calcite/linq4j/DefaultEnumerable.java
+++ b/linq4j/src/main/java/org/apache/calcite/linq4j/DefaultEnumerable.java
@@ -448,6 +448,13 @@ protected OrderedQueryable<T> asOrderedQueryable() {
nonEquiPredicate, equiPredicate);
}
+ @Override public <TInner, TResult> Enumerable<TResult>
leftMarkNestedLoopJoin(
+ Enumerable<TInner> inner,
+ NullablePredicate2<T, TInner> predicate,
+ Function2<T, @Nullable Boolean, TResult> resultSelector) {
+ return EnumerableDefaults.leftMarkNestedLoopJoin(getThis(), inner,
predicate, resultSelector);
+ }
+
@Override public <TInner, TResult> Enumerable<TResult> correlateJoin(
JoinType joinType, Function1<T, Enumerable<TInner>> inner,
Function2<T, TInner, TResult> resultSelector) {
diff --git
a/linq4j/src/main/java/org/apache/calcite/linq4j/EnumerableDefaults.java
b/linq4j/src/main/java/org/apache/calcite/linq4j/EnumerableDefaults.java
index 96828708f7..28f4a1185e 100644
--- a/linq4j/src/main/java/org/apache/calcite/linq4j/EnumerableDefaults.java
+++ b/linq4j/src/main/java/org/apache/calcite/linq4j/EnumerableDefaults.java
@@ -1965,6 +1965,61 @@ static <TSource, TInner, TKey, TNsKey, TResult>
Enumerable<TResult> leftMarkHash
};
}
+ /**
+ * The implementation of left mark join based on nested loop.
+ *
+ * @param outer Left input
+ * @param inner Right input
+ * @param predicate Non-equi predicate that can return NULL
+ * @param resultSelector Function that concats the row of left input and
marker
+ */
+ public static <TSource, TInner, TResult> Enumerable<TResult>
leftMarkNestedLoopJoin(
+ final Enumerable<TSource> outer, final Enumerable<TInner> inner,
+ final NullablePredicate2<TSource, TInner> predicate,
+ final Function2<TSource, @Nullable Boolean, TResult> resultSelector) {
+ return new AbstractEnumerable<TResult>() {
+ @Override public Enumerator<TResult> enumerator() {
+ return new Enumerator<TResult>() {
+ Enumerator<TSource> outers = outer.enumerator();
+ @Nullable Boolean marker = false;
+
+ @Override public TResult current() {
+ return resultSelector.apply(outers.current(), marker);
+ }
+
+ @Override public boolean moveNext() {
+ if (!outers.moveNext()) {
+ return false;
+ }
+ marker = false;
+ final TSource outerRow = outers.current();
+ try (Enumerator<TInner> inners = inner.enumerator()) {
+ while (inners.moveNext()) {
+ final TInner innerRow = inners.current();
+ Boolean predicateMatched = predicate.apply(outerRow, innerRow);
+ if (predicateMatched == null) {
+ marker = null;
+ } else if (predicateMatched) {
+ marker = true;
+ break;
+ }
+ }
+ }
+ return true;
+ }
+
+ @Override public void reset() {
+ outers.reset();
+ }
+
+ @Override public void close() {
+ outers.close();
+ }
+ };
+ }
+ };
+ }
+
/**
* For each row of the {@code outer} enumerable returns the correlated rows
* from the {@code inner} enumerable.
diff --git
a/linq4j/src/main/java/org/apache/calcite/linq4j/ExtendedEnumerable.java
b/linq4j/src/main/java/org/apache/calcite/linq4j/ExtendedEnumerable.java
index 67887e7d08..982ab0ca85 100644
--- a/linq4j/src/main/java/org/apache/calcite/linq4j/ExtendedEnumerable.java
+++ b/linq4j/src/main/java/org/apache/calcite/linq4j/ExtendedEnumerable.java
@@ -686,6 +686,19 @@ <TInner, TKey, TNsKey, TResult> Enumerable<TResult>
leftMarkHashJoin(Enumerable<
NullablePredicate2<TSource, TInner> nonEquiPredicate,
NullablePredicate2<TSource, TInner> equiPredicate);
+ /**
+ * The implementation of left mark join based on nested loop.
+ *
+ * @param inner Inner enumerable
+ * @param predicate Non-equi predicate that can return NULL
+ * @param resultSelector Function that concat the row of the
current enumerable and
+ * marker
+ * @see #leftMarkHashJoin
+ */
+ <TInner, TResult> Enumerable<TResult>
leftMarkNestedLoopJoin(Enumerable<TInner> inner,
+ NullablePredicate2<TSource, TInner> predicate,
+ Function2<TSource, @Nullable Boolean, TResult> resultSelector);
+
/**
* For each row of the current enumerable returns the correlated rows
* from the {@code inner} enumerable (nested loops join).