This is an automated email from the ASF dual-hosted git repository.
mbudiu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new f6e7618998 [CALCITE-6962] Exists subquery returns incorrect result
when or condition involves null column
f6e7618998 is described below
commit f6e7618998c034d63f49ae3851513b8b39104798
Author: suibianwanwan <[email protected]>
AuthorDate: Mon May 12 01:45:04 2025 +0800
[CALCITE-6962] Exists subquery returns incorrect result when or condition
involves null column
---
.../apache/calcite/sql2rel/RelDecorrelator.java | 74 ++++++++++++++++++-
.../org/apache/calcite/test/RelOptRulesTest.xml | 4 +-
.../apache/calcite/test/SqlToRelConverterTest.xml | 4 +-
core/src/test/resources/sql/sub-query.iq | 86 ++++++++++++++++++++++
4 files changed, 160 insertions(+), 8 deletions(-)
diff --git a/core/src/main/java/org/apache/calcite/sql2rel/RelDecorrelator.java
b/core/src/main/java/org/apache/calcite/sql2rel/RelDecorrelator.java
index 0f0eb0cdc3..e2808718a6 100644
--- a/core/src/main/java/org/apache/calcite/sql2rel/RelDecorrelator.java
+++ b/core/src/main/java/org/apache/calcite/sql2rel/RelDecorrelator.java
@@ -25,6 +25,7 @@
import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.plan.RelRule;
+import org.apache.calcite.plan.Strong;
import org.apache.calcite.plan.hep.HepPlanner;
import org.apache.calcite.plan.hep.HepProgram;
import org.apache.calcite.plan.hep.HepProgramBuilder;
@@ -1424,11 +1425,21 @@ private static boolean isWidening(RelDataType type,
RelDataType type1) {
}
final int newLeftPos =
requireNonNull(leftFrame.oldToNewOutputs.get(corDef.field));
final int newRightPos = rightOutput.getValue();
- conditions.add(
- relBuilder.equals(RexInputRef.of(newLeftPos, newLeftOutput),
- new RexInputRef(newLeftFieldCount + newRightPos,
- newRightOutput.get(newRightPos).getType())));
+ // Using `equals` instead of `IS NOT DISTINCT FROM` is an optimization
+ // for non-nullable fields. However, `IS NOT DISTINCT FROM` is always
+ // the correct choice in all cases.
+ if (isFieldNotNull(rightFrame.r, newRightPos)) {
+ conditions.add(
+ relBuilder.equals(RexInputRef.of(newLeftPos, newLeftOutput),
+ new RexInputRef(newLeftFieldCount + newRightPos,
+ newRightOutput.get(newRightPos).getType())));
+ } else {
+ conditions.add(
+ relBuilder.isNotDistinctFrom(RexInputRef.of(newLeftPos,
newLeftOutput),
+ new RexInputRef(newLeftFieldCount + newRightPos,
+ newRightOutput.get(newRightPos).getType())));
+ }
// remove this corVar from output position mapping
corDefOutputs.remove(corDef);
}
@@ -3201,6 +3212,61 @@ assert allLessThan(this.oldToNewOutputs.values(),
}
}
+ /**
+ * Check if the field at the given index is non-nullable.
+ *
+ * <p>This method performs a basic check for `null` values in the field.
However, a
+ * `false` result does not necessarily mean that the field contains `null`
values.
+ * It only guarantees that if the result is `true`, the field contains no
`null` values.
+ */
+ private static boolean isFieldNotNull(RelNode rel, int index) {
+ RelDataType type = rel.getRowType().getFieldList().get(index).getType();
+ return !type.isNullable() || isFieldNotNullRecursive(rel, index);
+ }
+
+ private static boolean isFieldNotNullRecursive(RelNode rel, int index) {
+ if (rel instanceof Project) {
+ Project project = (Project) rel;
+
+ RexNode expr = project.getProjects().get(index);
+ if (!(expr instanceof RexInputRef)) {
+ return false;
+ }
+ return isFieldNotNullRecursive(project.getInput(), ((RexInputRef)
expr).getIndex());
+ } else if (rel instanceof Aggregate) {
+ Aggregate agg = (Aggregate) rel;
+ ImmutableBitSet groupSet = agg.getGroupSet();
+
+ if (index >= groupSet.size()) {
+ return false;
+ }
+ return isFieldNotNullRecursive(agg.getInput(),
groupSet.asList().get(index));
+ } else if (rel instanceof Filter) {
+ Filter filter = (Filter) rel;
+ if (Strong.isNotTrue(filter.getCondition(), ImmutableBitSet.of(index))) {
+ return true;
+ }
+ return isFieldNotNullRecursive(filter.getInput(), index);
+ } else if (rel instanceof Join) {
+ Join join = (Join) rel;
+ int leftFieldCnt = join.getLeft().getRowType().getFieldCount();
+ if (index < join.getLeft().getRowType().getFieldCount()) {
+ if (!join.getJoinType().generatesNullsOnLeft()) {
+ return Strong.isNotTrue(join.getCondition(),
ImmutableBitSet.of(index))
+ || isFieldNotNullRecursive(join.getLeft(), index);
+ }
+ } else {
+ if (!join.getJoinType().generatesNullsOnRight()) {
+ return Strong.isNotTrue(join.getCondition(),
ImmutableBitSet.of(index))
+ || isFieldNotNullRecursive(join.getRight(), index -
leftFieldCnt);
+ }
+ }
+ return false;
+ } else {
+ return false;
+ }
+ }
+
// -------------------------------------------------------------------------
// Getter/Setter
// -------------------------------------------------------------------------
diff --git
a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index 2ab97cd100..18afbd7e2c 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -1931,7 +1931,7 @@ LogicalProject(NAME=[$1], EXPR$1=[+($0, $2)])
<Resource name="planAfter">
<![CDATA[
LogicalProject(NAME=[$1], EXPR$1=[+($0, $3)])
- LogicalJoin(condition=[=($0, $2)], joinType=[left])
+ LogicalJoin(condition=[IS NOT DISTINCT FROM($0, $2)], joinType=[left])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
LogicalProject(DEPTNO00=[$11], EMPNO=[$0])
@@ -7601,7 +7601,7 @@ where n1.SAL IN (
<Resource name="planBefore">
<![CDATA[
LogicalProject(SAL=[$5])
- LogicalJoin(condition=[AND(=($5, $11), =($9, $12))], joinType=[inner])
+ LogicalJoin(condition=[AND(=($5, $11), IS NOT DISTINCT FROM($9, $12))],
joinType=[inner])
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4],
SAL=[$5], COMM=[$6], SLACKER=[$8], SAL0=[$5], $f9=[=($5, 4)])
LogicalFilter(condition=[AND(=($7, 20), >($5, 1000))])
LogicalTableScan(table=[[CATALOG, SALES, EMPNULLABLES]])
diff --git
a/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml
b/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml
index 316fb51ba5..2dcbe1f413 100644
--- a/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml
@@ -5178,7 +5178,7 @@ from (select 2+deptno d2, 3+deptno d3 from emp) e
<![CDATA[
LogicalProject(D2=[$0], D3=[$1])
LogicalProject(D2=[$0], D3=[$1], D1=[CAST($2):INTEGER], D6=[$3],
$f2=[CAST($4):BOOLEAN])
- LogicalJoin(condition=[AND(=($0, $2), =($1, $3))], joinType=[inner])
+ LogicalJoin(condition=[AND(=($0, $2), IS NOT DISTINCT FROM($1, $3))],
joinType=[inner])
LogicalProject(D2=[+(2, $7)], D3=[+(3, $7)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalAggregate(group=[{0, 1}], agg#0=[MIN($2)])
@@ -5208,7 +5208,7 @@ from (select 2+deptno d2, 3+deptno d3 from emp) e
<![CDATA[
LogicalProject(D2=[$0], D3=[$1])
LogicalProject(D2=[$0], D3=[$1], D1=[CAST($2):INTEGER], D6=[$3],
$f2=[CAST($4):BOOLEAN])
- LogicalJoin(condition=[AND(=($0, $2), =($1, $3))], joinType=[inner])
+ LogicalJoin(condition=[AND(=($0, $2), IS NOT DISTINCT FROM($1, $3))],
joinType=[inner])
LogicalProject(D2=[+(2, $7)], D3=[+(3, $7)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalAggregate(group=[{0, 1}], agg#0=[MIN($2)])
diff --git a/core/src/test/resources/sql/sub-query.iq
b/core/src/test/resources/sql/sub-query.iq
index 392463ab3f..b2217579d1 100644
--- a/core/src/test/resources/sql/sub-query.iq
+++ b/core/src/test/resources/sql/sub-query.iq
@@ -4178,6 +4178,92 @@ where "scott".emp.deptno in (select "scott".dept.deptno
!ok
+select *
+from "scott".emp as e
+where exists (
+ select empno
+ from "scott".emp as ee
+ where e.empno = ee.empno or e.comm >= ee.sal
+);
++-------+--------+-----------+------+------------+---------+---------+--------+
+| EMPNO | ENAME | JOB | MGR | HIREDATE | SAL | COMM | DEPTNO |
++-------+--------+-----------+------+------------+---------+---------+--------+
+| 7369 | SMITH | CLERK | 7902 | 1980-12-17 | 800.00 | | 20 |
+| 7499 | ALLEN | SALESMAN | 7698 | 1981-02-20 | 1600.00 | 300.00 | 30 |
+| 7521 | WARD | SALESMAN | 7698 | 1981-02-22 | 1250.00 | 500.00 | 30 |
+| 7566 | JONES | MANAGER | 7839 | 1981-02-04 | 2975.00 | | 20 |
+| 7654 | MARTIN | SALESMAN | 7698 | 1981-09-28 | 1250.00 | 1400.00 | 30 |
+| 7698 | BLAKE | MANAGER | 7839 | 1981-01-05 | 2850.00 | | 30 |
+| 7782 | CLARK | MANAGER | 7839 | 1981-06-09 | 2450.00 | | 10 |
+| 7788 | SCOTT | ANALYST | 7566 | 1987-04-19 | 3000.00 | | 20 |
+| 7839 | KING | PRESIDENT | | 1981-11-17 | 5000.00 | | 10 |
+| 7844 | TURNER | SALESMAN | 7698 | 1981-09-08 | 1500.00 | 0.00 | 30 |
+| 7876 | ADAMS | CLERK | 7788 | 1987-05-23 | 1100.00 | | 20 |
+| 7900 | JAMES | CLERK | 7698 | 1981-12-03 | 950.00 | | 30 |
+| 7902 | FORD | ANALYST | 7566 | 1981-12-03 | 3000.00 | | 20 |
+| 7934 | MILLER | CLERK | 7782 | 1982-01-23 | 1300.00 | | 10 |
++-------+--------+-----------+------+------------+---------+---------+--------+
+(14 rows)
+
+!ok
+
+
+SELECT
+ e1.COMM,
+ EXISTS (
+ SELECT 1
+ FROM EMP e2
+ WHERE e2.COMM IS NULL OR e2.COMM > e1.COMM * 10
+ ) AS exists_flag
+FROM EMP e1;
++---------+-------------+
+| COMM | EXISTS_FLAG |
++---------+-------------+
+| 0.00 | true |
+| 1400.00 | true |
+| 300.00 | true |
+| 500.00 | true |
+| | true |
+| | true |
+| | true |
+| | true |
+| | true |
+| | true |
+| | true |
+| | true |
+| | true |
+| | true |
++---------+-------------+
+(14 rows)
+
+!ok
+
+SELECT
+ i1.COMM,
+ i1.COMM = ANY(SELECT COMM FROM EMP WHERE COMM=i1.COMM)
+FROM EMP i1 ORDER BY COMM;
++---------+--------+
+| COMM | EXPR$1 |
++---------+--------+
+| 0.00 | true |
+| 300.00 | true |
+| 500.00 | true |
+| 1400.00 | true |
+| | |
+| | |
+| | |
+| | |
+| | |
+| | |
+| | |
+| | |
+| | |
+| | |
++---------+--------+
+(14 rows)
+
+!ok
+
# Test case for [CALCITE-5789]
select deptno from dept d1 where exists (
select 1 from dept d2 where d2.deptno = d1.deptno and exists (