This is an automated email from the ASF dual-hosted git repository.
silun pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new 4c041cb95f [CALCITE-7414] Incorrect mapping of CorDef after
decorrelating a Join in TopDownGeneralDecorrelator
4c041cb95f is described below
commit 4c041cb95fd84029a5b292fab0d49eaa02c17f97
Author: Silun Dong <[email protected]>
AuthorDate: Wed Feb 11 22:18:50 2026 +0800
[CALCITE-7414] Incorrect mapping of CorDef after decorrelating a Join in
TopDownGeneralDecorrelator
---
.../sql2rel/TopDownGeneralDecorrelator.java | 110 +++++++++++++++------
core/src/test/resources/sql/new-decorr.iq | 101 +++++++++++++++++++
2 files changed, 179 insertions(+), 32 deletions(-)
diff --git
a/core/src/main/java/org/apache/calcite/sql2rel/TopDownGeneralDecorrelator.java
b/core/src/main/java/org/apache/calcite/sql2rel/TopDownGeneralDecorrelator.java
index 50b2006154..8246579946 100644
---
a/core/src/main/java/org/apache/calcite/sql2rel/TopDownGeneralDecorrelator.java
+++
b/core/src/main/java/org/apache/calcite/sql2rel/TopDownGeneralDecorrelator.java
@@ -53,7 +53,6 @@
import org.apache.calcite.sql2rel.RelDecorrelator.Frame;
import org.apache.calcite.tools.RelBuilder;
import org.apache.calcite.util.ImmutableBitSet;
-import org.apache.calcite.util.Litmus;
import org.apache.calcite.util.Pair;
import org.apache.calcite.util.ReflectUtil;
import org.apache.calcite.util.ReflectiveVisitor;
@@ -317,14 +316,16 @@ private RelNode correlateElimination(RelNode rel, boolean
allowEmptyOutputFromRe
if (!hasParent) {
// ensure that the fields are in the same order as in the original plan.
- builder.push(unnestedRel);
UnnestedQuery unnestedQuery =
- UnnestedQuery.createJoinUnnestInfo(
+ UnnestedQuery.createJoinUnnestedQuery(
leftInfo,
rightInfo,
correlate,
unnestedRel,
- correlate.getJoinType());
+ correlate.getJoinType(),
+ builder,
+ corDefs);
+ builder.push(unnestedQuery.r);
List<RexNode> projects
= builder.fields(new
ArrayList<>(unnestedQuery.oldToNewOutputs.values()));
unnestedRel = builder.project(projects).build();
@@ -669,10 +670,10 @@ public RelNode unnestInternal(Correlate correlate,
boolean allowEmptyOutputFromR
UnnestedQuery rightInfo
=
requireNonNull(subDecorrelator.mapRelToUnnestedQuery.get(correlate.getRight()));
UnnestedQuery unnestedQuery =
- UnnestedQuery.createJoinUnnestInfo(leftInfo, rightInfo, correlate,
- newJoin, correlate.getJoinType());
+ UnnestedQuery.createJoinUnnestedQuery(leftInfo, rightInfo, correlate,
+ newJoin, correlate.getJoinType(), builder, corDefs);
mapRelToUnnestedQuery.put(correlate, unnestedQuery);
- return newJoin;
+ return unnestedQuery.r;
}
public RelNode unnestInternal(Join join, boolean
allowEmptyOutputFromRewrite) {
@@ -745,14 +746,16 @@ public RelNode unnestInternal(Join join, boolean
allowEmptyOutputFromRewrite) {
corDefs);
RelNode newJoin = builder.join(join.getJoinType(),
newJoinCondition).build();
UnnestedQuery unnestedQuery =
- UnnestedQuery.createJoinUnnestInfo(
+ UnnestedQuery.createJoinUnnestedQuery(
leftInfo,
rightInfo,
join,
newJoin,
- join.getJoinType());
+ join.getJoinType(),
+ builder,
+ corDefs);
mapRelToUnnestedQuery.put(join, unnestedQuery);
- return newJoin;
+ return unnestedQuery.r;
}
public RelNode unnestInternal(SetOp setOp, boolean
allowEmptyOutputFromRewrite) {
@@ -989,23 +992,27 @@ static class UnnestedQuery extends Frame {
/**
* Create UnnestedQuery for Join/Correlate after decorrelating.
*
- * @param leftInfo UnnestedQuery of the left side
- * @param rightInfo UnnestedQuery of the right side
- * @param oriJoinNode original Join/Correlate node
- * @param unnestedJoinNode new node after decorrelating
- * @param joinRelType join type of original Join/Correlate
+ * @param leftUnnestedQuery UnnestedQuery of the left side
+ * @param rightUnnestedQuery UnnestedQuery of the right side
+ * @param oriJoinNode original Join/Correlate node
+ * @param unnestedJoinNode new node after decorrelating
+ * @param joinRelType join type of original Join/Correlate
+ * @param builder RelBuilder
+ * @param corDefs the CorDef in the current decorrelator
context
* @return UnnestedQuery
*/
- private static UnnestedQuery createJoinUnnestInfo(
- UnnestedQuery leftInfo,
- UnnestedQuery rightInfo,
+ private static UnnestedQuery createJoinUnnestedQuery(
+ UnnestedQuery leftUnnestedQuery,
+ UnnestedQuery rightUnnestedQuery,
RelNode oriJoinNode,
RelNode unnestedJoinNode,
- JoinRelType joinRelType) {
+ JoinRelType joinRelType,
+ RelBuilder builder,
+ NavigableSet<CorDef> corDefs) {
Map<Integer, Integer> oldToNewOutputs = new HashMap<>();
- oldToNewOutputs.putAll(leftInfo.oldToNewOutputs);
- int oriLeftFieldCount = leftInfo.oldRel.getRowType().getFieldCount();
- int newLeftFieldCount = leftInfo.r.getRowType().getFieldCount();
+ oldToNewOutputs.putAll(leftUnnestedQuery.oldToNewOutputs);
+ int oriLeftFieldCount =
leftUnnestedQuery.oldRel.getRowType().getFieldCount();
+ int newLeftFieldCount = leftUnnestedQuery.r.getRowType().getFieldCount();
switch (joinRelType) {
case SEMI:
case ANTI:
@@ -1014,24 +1021,63 @@ private static UnnestedQuery createJoinUnnestInfo(
oldToNewOutputs.put(oriLeftFieldCount, newLeftFieldCount);
break;
default:
- rightInfo.oldToNewOutputs.forEach((oriIndex, newIndex) ->
+ rightUnnestedQuery.oldToNewOutputs.forEach((oriIndex, newIndex) ->
oldToNewOutputs.put(
requireNonNull(oriIndex, "oriIndex") + oriLeftFieldCount,
requireNonNull(newIndex, "newIndex") + newLeftFieldCount));
break;
}
+ // we have to take the join type into account to decide which side of
the join to use for
+ // mapping CorDef to output index. See section 3.3 in paper Improving
Unnesting of Complex
+ // Queries
TreeMap<CorDef, Integer> corDefOutputs = new TreeMap<>();
- if (!leftInfo.corDefOutputs.isEmpty()) {
- corDefOutputs.putAll(leftInfo.corDefOutputs);
- } else if (!rightInfo.corDefOutputs.isEmpty()) {
- Litmus.THROW.check(joinRelType.projectsRight(),
- "If the joinType doesn't project right, its left side must have
UnnestInfo.");
- rightInfo.corDefOutputs.forEach((corDef, index) ->
+ switch (joinRelType) {
+ case SEMI:
+ case ANTI:
+ case LEFT_MARK:
+ case LEFT:
+ // if output only includes the left, or the unmatched rows from the
left,
+ // we use the left for mapping.
+ corDefOutputs.putAll(leftUnnestedQuery.corDefOutputs);
+ break;
+ case RIGHT:
+ // if the unmatched rows from the right, we use the right for mapping.
+ rightUnnestedQuery.corDefOutputs.forEach((corDef, index) ->
corDefOutputs.put(corDef, index + newLeftFieldCount));
- } else {
- throw new IllegalArgumentException("The UnnestInfo for both sides of
Join/Correlate that "
- + "has correlation should not all be empty.");
+ break;
+ case FULL:
+ // when full outer join, we must use COALESCE(left_cor_index,
right_cor_index) to map the
+ // CorDef, so we need to add a Project on top of the Join.
+ builder.push(unnestedJoinNode);
+ List<RexNode> projects = new ArrayList<>(builder.fields());
+ for (CorDef corDef : corDefs) {
+ int leftIndex =
requireNonNull(leftUnnestedQuery.corDefOutputs.get(corDef));
+ int rightIndex =
requireNonNull(rightUnnestedQuery.corDefOutputs.get(corDef));
+ RexNode coalesce =
+ builder.call(
+ SqlStdOperatorTable.COALESCE,
+ builder.field(leftIndex),
+ builder.field(rightIndex + newLeftFieldCount));
+ projects.add(coalesce);
+ corDefOutputs.put(corDef, projects.size() - 1);
+ }
+ unnestedJoinNode = builder.project(projects).build();
+ break;
+ case INNER:
+ // when inner join, we can use either side that contains D for mapping.
+ if (!leftUnnestedQuery.corDefOutputs.isEmpty()) {
+ corDefOutputs.putAll(leftUnnestedQuery.corDefOutputs);
+ } else if (!rightUnnestedQuery.corDefOutputs.isEmpty()) {
+ rightUnnestedQuery.corDefOutputs.forEach((corDef, index) ->
+ corDefOutputs.put(corDef, index + newLeftFieldCount));
+ } else {
+ throw new IllegalArgumentException("The UnnestInfo for both sides of
Join/Correlate that "
+ + "has correlation should not all be empty.");
+ }
+ break;
+ default:
+ throw new UnsupportedOperationException("Unsupported join type : " +
joinRelType);
}
return new UnnestedQuery(oriJoinNode, unnestedJoinNode, corDefOutputs,
oldToNewOutputs);
}
diff --git a/core/src/test/resources/sql/new-decorr.iq
b/core/src/test/resources/sql/new-decorr.iq
index 4c2e5791af..73ed32f386 100644
--- a/core/src/test/resources/sql/new-decorr.iq
+++ b/core/src/test/resources/sql/new-decorr.iq
@@ -300,4 +300,105 @@ FROM dept;
!ok
+# [CALCITE-7414] Incorrect mapping of CorDef after decorrelating a Join in
TopDownGeneralDecorrelator
+# These cases come from sub-query.iq [CALCITE-7379]
+SELECT e.ename, e.job, e.sal
+FROM emp e
+WHERE EXISTS (
+ SELECT 1
+ FROM (
+ SELECT * FROM bonus b WHERE b.ename = e.ename
+ ) foo
+ LEFT JOIN dept d
+ ON foo.job = d.loc
+);
++-------+-----+-----+
+| ENAME | JOB | SAL |
++-------+-----+-----+
++-------+-----+-----+
+(0 rows)
+
+!ok
+
+SELECT e.ename, e.job, e.sal
+FROM emp e
+WHERE EXISTS (
+ SELECT 1
+ FROM (
+ SELECT * FROM bonus b WHERE b.ename = e.ename
+ ) foo
+ RIGHT JOIN dept d
+ ON foo.job = d.loc
+);
++--------+-----------+---------+
+| ENAME | JOB | SAL |
++--------+-----------+---------+
+| ADAMS | CLERK | 1100.00 |
+| ALLEN | SALESMAN | 1600.00 |
+| BLAKE | MANAGER | 2850.00 |
+| CLARK | MANAGER | 2450.00 |
+| FORD | ANALYST | 3000.00 |
+| JAMES | CLERK | 950.00 |
+| JONES | MANAGER | 2975.00 |
+| KING | PRESIDENT | 5000.00 |
+| MARTIN | SALESMAN | 1250.00 |
+| MILLER | CLERK | 1300.00 |
+| SCOTT | ANALYST | 3000.00 |
+| SMITH | CLERK | 800.00 |
+| TURNER | SALESMAN | 1500.00 |
+| WARD | SALESMAN | 1250.00 |
++--------+-----------+---------+
+(14 rows)
+
+!ok
+
+SELECT e.ename, e.job, e.sal
+FROM emp e
+WHERE EXISTS (
+ SELECT 1
+ FROM (
+ SELECT * FROM bonus b WHERE b.ename = e.ename
+ ) foo
+ FULL JOIN dept d
+ ON foo.job = d.loc
+);
++--------+-----------+---------+
+| ENAME | JOB | SAL |
++--------+-----------+---------+
+| ADAMS | CLERK | 1100.00 |
+| ALLEN | SALESMAN | 1600.00 |
+| BLAKE | MANAGER | 2850.00 |
+| CLARK | MANAGER | 2450.00 |
+| FORD | ANALYST | 3000.00 |
+| JAMES | CLERK | 950.00 |
+| JONES | MANAGER | 2975.00 |
+| KING | PRESIDENT | 5000.00 |
+| MARTIN | SALESMAN | 1250.00 |
+| MILLER | CLERK | 1300.00 |
+| SCOTT | ANALYST | 3000.00 |
+| SMITH | CLERK | 800.00 |
+| TURNER | SALESMAN | 1500.00 |
+| WARD | SALESMAN | 1250.00 |
++--------+-----------+---------+
+(14 rows)
+
+!ok
+
+!if (use_new_decorr) {
+EnumerableCalc(expr#0..3=[{inputs}], ENAME=[$t1], JOB=[$t2], SAL=[$t3])
+ EnumerableHashJoin(condition=[IS NOT DISTINCT FROM($1, $4)], joinType=[semi])
+ EnumerableCalc(expr#0..7=[{inputs}], proj#0..2=[{exprs}], SAL=[$t5])
+ EnumerableTableScan(table=[[scott, EMP]])
+ EnumerableCalc(expr#0..5=[{inputs}], expr#6=[COALESCE($t2, $t5)],
$f10=[$t6])
+ EnumerableHashJoin(condition=[AND(=($1, $4), IS NOT DISTINCT FROM($2,
$5))], joinType=[full])
+ EnumerableCalc(expr#0..3=[{inputs}], expr#4=[CAST($t1):VARCHAR(13)],
expr#5=[IS NOT NULL($t0)], ENAME=[$t0], JOB0=[$t4], ENAME0=[$t0],
$condition=[$t5])
+ EnumerableTableScan(table=[[scott, BONUS]])
+ EnumerableNestedLoopJoin(condition=[true], joinType=[inner])
+ EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0], LOC=[$t2])
+ EnumerableTableScan(table=[[scott, DEPT]])
+ EnumerableCalc(expr#0..7=[{inputs}], ENAME=[$t1])
+ EnumerableTableScan(table=[[scott, EMP]])
+!plan
+!}
+
# End new-decorr.iq