This is an automated email from the ASF dual-hosted git repository.

silun pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/main by this push:
     new 4c041cb95f [CALCITE-7414] Incorrect mapping of CorDef after 
decorrelating a Join in TopDownGeneralDecorrelator
4c041cb95f is described below

commit 4c041cb95fd84029a5b292fab0d49eaa02c17f97
Author: Silun Dong <[email protected]>
AuthorDate: Wed Feb 11 22:18:50 2026 +0800

    [CALCITE-7414] Incorrect mapping of CorDef after decorrelating a Join in 
TopDownGeneralDecorrelator
---
 .../sql2rel/TopDownGeneralDecorrelator.java        | 110 +++++++++++++++------
 core/src/test/resources/sql/new-decorr.iq          | 101 +++++++++++++++++++
 2 files changed, 179 insertions(+), 32 deletions(-)

diff --git 
a/core/src/main/java/org/apache/calcite/sql2rel/TopDownGeneralDecorrelator.java 
b/core/src/main/java/org/apache/calcite/sql2rel/TopDownGeneralDecorrelator.java
index 50b2006154..8246579946 100644
--- 
a/core/src/main/java/org/apache/calcite/sql2rel/TopDownGeneralDecorrelator.java
+++ 
b/core/src/main/java/org/apache/calcite/sql2rel/TopDownGeneralDecorrelator.java
@@ -53,7 +53,6 @@
 import org.apache.calcite.sql2rel.RelDecorrelator.Frame;
 import org.apache.calcite.tools.RelBuilder;
 import org.apache.calcite.util.ImmutableBitSet;
-import org.apache.calcite.util.Litmus;
 import org.apache.calcite.util.Pair;
 import org.apache.calcite.util.ReflectUtil;
 import org.apache.calcite.util.ReflectiveVisitor;
@@ -317,14 +316,16 @@ private RelNode correlateElimination(RelNode rel, boolean 
allowEmptyOutputFromRe
 
     if (!hasParent) {
       // ensure that the fields are in the same order as in the original plan.
-      builder.push(unnestedRel);
       UnnestedQuery unnestedQuery =
-          UnnestedQuery.createJoinUnnestInfo(
+          UnnestedQuery.createJoinUnnestedQuery(
               leftInfo,
               rightInfo,
               correlate,
               unnestedRel,
-              correlate.getJoinType());
+              correlate.getJoinType(),
+              builder,
+              corDefs);
+      builder.push(unnestedQuery.r);
       List<RexNode> projects
           = builder.fields(new 
ArrayList<>(unnestedQuery.oldToNewOutputs.values()));
       unnestedRel = builder.project(projects).build();
@@ -669,10 +670,10 @@ public RelNode unnestInternal(Correlate correlate, 
boolean allowEmptyOutputFromR
     UnnestedQuery rightInfo
         = 
requireNonNull(subDecorrelator.mapRelToUnnestedQuery.get(correlate.getRight()));
     UnnestedQuery unnestedQuery =
-        UnnestedQuery.createJoinUnnestInfo(leftInfo, rightInfo, correlate,
-            newJoin, correlate.getJoinType());
+        UnnestedQuery.createJoinUnnestedQuery(leftInfo, rightInfo, correlate,
+            newJoin, correlate.getJoinType(), builder, corDefs);
     mapRelToUnnestedQuery.put(correlate, unnestedQuery);
-    return newJoin;
+    return unnestedQuery.r;
   }
 
   public RelNode unnestInternal(Join join, boolean 
allowEmptyOutputFromRewrite) {
@@ -745,14 +746,16 @@ public RelNode unnestInternal(Join join, boolean 
allowEmptyOutputFromRewrite) {
             corDefs);
     RelNode newJoin = builder.join(join.getJoinType(), 
newJoinCondition).build();
     UnnestedQuery unnestedQuery =
-        UnnestedQuery.createJoinUnnestInfo(
+        UnnestedQuery.createJoinUnnestedQuery(
             leftInfo,
             rightInfo,
             join,
             newJoin,
-            join.getJoinType());
+            join.getJoinType(),
+            builder,
+            corDefs);
     mapRelToUnnestedQuery.put(join, unnestedQuery);
-    return newJoin;
+    return unnestedQuery.r;
   }
 
   public RelNode unnestInternal(SetOp setOp, boolean 
allowEmptyOutputFromRewrite) {
@@ -989,23 +992,27 @@ static class UnnestedQuery extends Frame {
     /**
      * Create UnnestedQuery for Join/Correlate after decorrelating.
      *
-     * @param leftInfo          UnnestedQuery of the left side
-     * @param rightInfo         UnnestedQuery of the right side
-     * @param oriJoinNode       original Join/Correlate node
-     * @param unnestedJoinNode  new node after decorrelating
-     * @param joinRelType       join type of original Join/Correlate
+     * @param leftUnnestedQuery   UnnestedQuery of the left side
+     * @param rightUnnestedQuery  UnnestedQuery of the right side
+     * @param oriJoinNode         original Join/Correlate node
+     * @param unnestedJoinNode    new node after decorrelating
+     * @param joinRelType         join type of original Join/Correlate
+     * @param builder             RelBuilder
+     * @param corDefs             the CorDef in the current decorrelator 
context
      * @return UnnestedQuery
      */
-    private static UnnestedQuery createJoinUnnestInfo(
-        UnnestedQuery leftInfo,
-        UnnestedQuery rightInfo,
+    private static UnnestedQuery createJoinUnnestedQuery(
+        UnnestedQuery leftUnnestedQuery,
+        UnnestedQuery rightUnnestedQuery,
         RelNode oriJoinNode,
         RelNode unnestedJoinNode,
-        JoinRelType joinRelType) {
+        JoinRelType joinRelType,
+        RelBuilder builder,
+        NavigableSet<CorDef> corDefs) {
       Map<Integer, Integer> oldToNewOutputs = new HashMap<>();
-      oldToNewOutputs.putAll(leftInfo.oldToNewOutputs);
-      int oriLeftFieldCount = leftInfo.oldRel.getRowType().getFieldCount();
-      int newLeftFieldCount = leftInfo.r.getRowType().getFieldCount();
+      oldToNewOutputs.putAll(leftUnnestedQuery.oldToNewOutputs);
+      int oriLeftFieldCount = 
leftUnnestedQuery.oldRel.getRowType().getFieldCount();
+      int newLeftFieldCount = leftUnnestedQuery.r.getRowType().getFieldCount();
       switch (joinRelType) {
       case SEMI:
       case ANTI:
@@ -1014,24 +1021,63 @@ private static UnnestedQuery createJoinUnnestInfo(
         oldToNewOutputs.put(oriLeftFieldCount, newLeftFieldCount);
         break;
       default:
-        rightInfo.oldToNewOutputs.forEach((oriIndex, newIndex) ->
+        rightUnnestedQuery.oldToNewOutputs.forEach((oriIndex, newIndex) ->
             oldToNewOutputs.put(
                 requireNonNull(oriIndex, "oriIndex") + oriLeftFieldCount,
                 requireNonNull(newIndex, "newIndex") + newLeftFieldCount));
         break;
       }
 
+      // we have to take the join type into account to decide which side of 
the join to use for
+      // mapping CorDef to output index. See section 3.3 in paper Improving 
Unnesting of Complex
+      // Queries
       TreeMap<CorDef, Integer> corDefOutputs = new TreeMap<>();
-      if (!leftInfo.corDefOutputs.isEmpty()) {
-        corDefOutputs.putAll(leftInfo.corDefOutputs);
-      } else if (!rightInfo.corDefOutputs.isEmpty()) {
-        Litmus.THROW.check(joinRelType.projectsRight(),
-            "If the joinType doesn't project right, its left side must have 
UnnestInfo.");
-        rightInfo.corDefOutputs.forEach((corDef, index) ->
+      switch (joinRelType) {
+      case SEMI:
+      case ANTI:
+      case LEFT_MARK:
+      case LEFT:
+        // if output only includes the left, or the unmatched rows from the 
left,
+        // we use the left for mapping.
+        corDefOutputs.putAll(leftUnnestedQuery.corDefOutputs);
+        break;
+      case RIGHT:
+        // if the unmatched rows from the right, we use the right for mapping.
+        rightUnnestedQuery.corDefOutputs.forEach((corDef, index) ->
             corDefOutputs.put(corDef, index + newLeftFieldCount));
-      } else {
-        throw new IllegalArgumentException("The UnnestInfo for both sides of 
Join/Correlate that "
-            + "has correlation should not all be empty.");
+        break;
+      case FULL:
+        // when full outer join, we must use COALESCE(left_cor_index, 
right_cor_index) to map the
+        // CorDef, so we need to add a Project on top of the Join.
+        builder.push(unnestedJoinNode);
+        List<RexNode> projects = new ArrayList<>(builder.fields());
+        for (CorDef corDef : corDefs) {
+          int leftIndex = 
requireNonNull(leftUnnestedQuery.corDefOutputs.get(corDef));
+          int rightIndex = 
requireNonNull(rightUnnestedQuery.corDefOutputs.get(corDef));
+          RexNode coalesce =
+              builder.call(
+                  SqlStdOperatorTable.COALESCE,
+                  builder.field(leftIndex),
+                  builder.field(rightIndex + newLeftFieldCount));
+          projects.add(coalesce);
+          corDefOutputs.put(corDef, projects.size() - 1);
+        }
+        unnestedJoinNode = builder.project(projects).build();
+        break;
+      case INNER:
+        // when inner join, we can use either side that contains D for mapping.
+        if (!leftUnnestedQuery.corDefOutputs.isEmpty()) {
+          corDefOutputs.putAll(leftUnnestedQuery.corDefOutputs);
+        } else if (!rightUnnestedQuery.corDefOutputs.isEmpty()) {
+          rightUnnestedQuery.corDefOutputs.forEach((corDef, index) ->
+              corDefOutputs.put(corDef, index + newLeftFieldCount));
+        } else {
+          throw new IllegalArgumentException("The UnnestInfo for both sides of 
Join/Correlate that "
+              + "has correlation should not all be empty.");
+        }
+        break;
+      default:
+        throw new UnsupportedOperationException("Unsupported join type : " + 
joinRelType);
       }
       return new UnnestedQuery(oriJoinNode, unnestedJoinNode, corDefOutputs, 
oldToNewOutputs);
     }
diff --git a/core/src/test/resources/sql/new-decorr.iq 
b/core/src/test/resources/sql/new-decorr.iq
index 4c2e5791af..73ed32f386 100644
--- a/core/src/test/resources/sql/new-decorr.iq
+++ b/core/src/test/resources/sql/new-decorr.iq
@@ -300,4 +300,105 @@ FROM dept;
 
 !ok
 
+# [CALCITE-7414] Incorrect mapping of CorDef after decorrelating a Join in 
TopDownGeneralDecorrelator
+# These cases come from sub-query.iq [CALCITE-7379]
+SELECT e.ename, e.job, e.sal
+FROM emp e
+WHERE EXISTS (
+    SELECT 1
+    FROM (
+        SELECT * FROM bonus b WHERE b.ename = e.ename
+    ) foo
+    LEFT JOIN dept d
+    ON foo.job = d.loc
+);
++-------+-----+-----+
+| ENAME | JOB | SAL |
++-------+-----+-----+
++-------+-----+-----+
+(0 rows)
+
+!ok
+
+SELECT e.ename, e.job, e.sal
+FROM emp e
+WHERE EXISTS (
+    SELECT 1
+    FROM (
+        SELECT * FROM bonus b WHERE b.ename = e.ename
+    ) foo
+    RIGHT JOIN dept d
+    ON foo.job = d.loc
+);
++--------+-----------+---------+
+| ENAME  | JOB       | SAL     |
++--------+-----------+---------+
+| ADAMS  | CLERK     | 1100.00 |
+| ALLEN  | SALESMAN  | 1600.00 |
+| BLAKE  | MANAGER   | 2850.00 |
+| CLARK  | MANAGER   | 2450.00 |
+| FORD   | ANALYST   | 3000.00 |
+| JAMES  | CLERK     |  950.00 |
+| JONES  | MANAGER   | 2975.00 |
+| KING   | PRESIDENT | 5000.00 |
+| MARTIN | SALESMAN  | 1250.00 |
+| MILLER | CLERK     | 1300.00 |
+| SCOTT  | ANALYST   | 3000.00 |
+| SMITH  | CLERK     |  800.00 |
+| TURNER | SALESMAN  | 1500.00 |
+| WARD   | SALESMAN  | 1250.00 |
++--------+-----------+---------+
+(14 rows)
+
+!ok
+
+SELECT e.ename, e.job, e.sal
+FROM emp e
+WHERE EXISTS (
+    SELECT 1
+    FROM (
+        SELECT * FROM bonus b WHERE b.ename = e.ename
+    ) foo
+    FULL JOIN dept d
+    ON foo.job = d.loc
+);
++--------+-----------+---------+
+| ENAME  | JOB       | SAL     |
++--------+-----------+---------+
+| ADAMS  | CLERK     | 1100.00 |
+| ALLEN  | SALESMAN  | 1600.00 |
+| BLAKE  | MANAGER   | 2850.00 |
+| CLARK  | MANAGER   | 2450.00 |
+| FORD   | ANALYST   | 3000.00 |
+| JAMES  | CLERK     |  950.00 |
+| JONES  | MANAGER   | 2975.00 |
+| KING   | PRESIDENT | 5000.00 |
+| MARTIN | SALESMAN  | 1250.00 |
+| MILLER | CLERK     | 1300.00 |
+| SCOTT  | ANALYST   | 3000.00 |
+| SMITH  | CLERK     |  800.00 |
+| TURNER | SALESMAN  | 1500.00 |
+| WARD   | SALESMAN  | 1250.00 |
++--------+-----------+---------+
+(14 rows)
+
+!ok
+
+!if (use_new_decorr) {
+EnumerableCalc(expr#0..3=[{inputs}], ENAME=[$t1], JOB=[$t2], SAL=[$t3])
+  EnumerableHashJoin(condition=[IS NOT DISTINCT FROM($1, $4)], joinType=[semi])
+    EnumerableCalc(expr#0..7=[{inputs}], proj#0..2=[{exprs}], SAL=[$t5])
+      EnumerableTableScan(table=[[scott, EMP]])
+    EnumerableCalc(expr#0..5=[{inputs}], expr#6=[COALESCE($t2, $t5)], 
$f10=[$t6])
+      EnumerableHashJoin(condition=[AND(=($1, $4), IS NOT DISTINCT FROM($2, 
$5))], joinType=[full])
+        EnumerableCalc(expr#0..3=[{inputs}], expr#4=[CAST($t1):VARCHAR(13)], 
expr#5=[IS NOT NULL($t0)], ENAME=[$t0], JOB0=[$t4], ENAME0=[$t0], 
$condition=[$t5])
+          EnumerableTableScan(table=[[scott, BONUS]])
+        EnumerableNestedLoopJoin(condition=[true], joinType=[inner])
+          EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0], LOC=[$t2])
+            EnumerableTableScan(table=[[scott, DEPT]])
+          EnumerableCalc(expr#0..7=[{inputs}], ENAME=[$t1])
+            EnumerableTableScan(table=[[scott, EMP]])
+!plan
+!}
+
 # End new-decorr.iq

Reply via email to