This is an automated email from the ASF dual-hosted git repository.

mbudiu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/main by this push:
     new f6e7618998 [CALCITE-6962] Exists subquery returns incorrect result 
when or condition involves null column
f6e7618998 is described below

commit f6e7618998c034d63f49ae3851513b8b39104798
Author: suibianwanwan <[email protected]>
AuthorDate: Mon May 12 01:45:04 2025 +0800

    [CALCITE-6962] Exists subquery returns incorrect result when or condition 
involves null column
---
 .../apache/calcite/sql2rel/RelDecorrelator.java    | 74 ++++++++++++++++++-
 .../org/apache/calcite/test/RelOptRulesTest.xml    |  4 +-
 .../apache/calcite/test/SqlToRelConverterTest.xml  |  4 +-
 core/src/test/resources/sql/sub-query.iq           | 86 ++++++++++++++++++++++
 4 files changed, 160 insertions(+), 8 deletions(-)

diff --git a/core/src/main/java/org/apache/calcite/sql2rel/RelDecorrelator.java 
b/core/src/main/java/org/apache/calcite/sql2rel/RelDecorrelator.java
index 0f0eb0cdc3..e2808718a6 100644
--- a/core/src/main/java/org/apache/calcite/sql2rel/RelDecorrelator.java
+++ b/core/src/main/java/org/apache/calcite/sql2rel/RelDecorrelator.java
@@ -25,6 +25,7 @@
 import org.apache.calcite.plan.RelOptRuleCall;
 import org.apache.calcite.plan.RelOptUtil;
 import org.apache.calcite.plan.RelRule;
+import org.apache.calcite.plan.Strong;
 import org.apache.calcite.plan.hep.HepPlanner;
 import org.apache.calcite.plan.hep.HepProgram;
 import org.apache.calcite.plan.hep.HepProgramBuilder;
@@ -1424,11 +1425,21 @@ private static boolean isWidening(RelDataType type, 
RelDataType type1) {
       }
       final int newLeftPos = 
requireNonNull(leftFrame.oldToNewOutputs.get(corDef.field));
       final int newRightPos = rightOutput.getValue();
-      conditions.add(
-          relBuilder.equals(RexInputRef.of(newLeftPos, newLeftOutput),
-              new RexInputRef(newLeftFieldCount + newRightPos,
-                  newRightOutput.get(newRightPos).getType())));
 
+      // Using `equals` instead of `IS NOT DISTINCT FROM` is an optimization
+      // for non-nullable fields. However, `IS NOT DISTINCT FROM` is always
+      // the correct choice in all cases.
+      if (isFieldNotNull(rightFrame.r, newRightPos)) {
+        conditions.add(
+            relBuilder.equals(RexInputRef.of(newLeftPos, newLeftOutput),
+                new RexInputRef(newLeftFieldCount + newRightPos,
+                    newRightOutput.get(newRightPos).getType())));
+      } else {
+        conditions.add(
+            relBuilder.isNotDistinctFrom(RexInputRef.of(newLeftPos, 
newLeftOutput),
+                new RexInputRef(newLeftFieldCount + newRightPos,
+                    newRightOutput.get(newRightPos).getType())));
+      }
       // remove this corVar from output position mapping
       corDefOutputs.remove(corDef);
     }
@@ -3201,6 +3212,61 @@ assert allLessThan(this.oldToNewOutputs.values(),
     }
   }
 
+  /**
+   * Check if the field at the given index is non-nullable.
+   *
+   * <p>This method performs a basic check for `null` values in the field. 
However, a
+   * `false` result does not necessarily mean that the field contains `null` 
values.
+   * It only guarantees that if the result is `true`, the field contains no 
`null` values.
+   */
+  private static boolean isFieldNotNull(RelNode rel, int index) {
+    RelDataType type = rel.getRowType().getFieldList().get(index).getType();
+    return !type.isNullable() || isFieldNotNullRecursive(rel, index);
+  }
+
+  private static boolean isFieldNotNullRecursive(RelNode rel, int index) {
+    if (rel instanceof Project) {
+      Project project = (Project) rel;
+
+      RexNode expr = project.getProjects().get(index);
+      if (!(expr instanceof RexInputRef)) {
+        return false;
+      }
+      return isFieldNotNullRecursive(project.getInput(), ((RexInputRef) 
expr).getIndex());
+    } else if (rel instanceof Aggregate) {
+      Aggregate agg = (Aggregate) rel;
+      ImmutableBitSet groupSet = agg.getGroupSet();
+
+      if (index >= groupSet.size()) {
+        return false;
+      }
+      return isFieldNotNullRecursive(agg.getInput(), 
groupSet.asList().get(index));
+    } else if (rel instanceof Filter) {
+      Filter filter = (Filter) rel;
+      if (Strong.isNotTrue(filter.getCondition(), ImmutableBitSet.of(index))) {
+        return true;
+      }
+      return isFieldNotNullRecursive(filter.getInput(), index);
+    } else if (rel instanceof Join) {
+      Join join = (Join) rel;
+      int leftFieldCnt = join.getLeft().getRowType().getFieldCount();
+      if (index < join.getLeft().getRowType().getFieldCount()) {
+        if (!join.getJoinType().generatesNullsOnLeft()) {
+          return Strong.isNotTrue(join.getCondition(), 
ImmutableBitSet.of(index))
+              || isFieldNotNullRecursive(join.getLeft(), index);
+        }
+      } else {
+        if (!join.getJoinType().generatesNullsOnRight()) {
+          return Strong.isNotTrue(join.getCondition(), 
ImmutableBitSet.of(index))
+              || isFieldNotNullRecursive(join.getRight(), index - 
leftFieldCnt);
+        }
+      }
+      return false;
+    } else {
+      return false;
+    }
+  }
+
   // -------------------------------------------------------------------------
   //  Getter/Setter
   // -------------------------------------------------------------------------
diff --git 
a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml 
b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index 2ab97cd100..18afbd7e2c 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -1931,7 +1931,7 @@ LogicalProject(NAME=[$1], EXPR$1=[+($0, $2)])
     <Resource name="planAfter">
       <![CDATA[
 LogicalProject(NAME=[$1], EXPR$1=[+($0, $3)])
-  LogicalJoin(condition=[=($0, $2)], joinType=[left])
+  LogicalJoin(condition=[IS NOT DISTINCT FROM($0, $2)], joinType=[left])
     LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
     LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
       LogicalProject(DEPTNO00=[$11], EMPNO=[$0])
@@ -7601,7 +7601,7 @@ where n1.SAL IN (
     <Resource name="planBefore">
       <![CDATA[
 LogicalProject(SAL=[$5])
-  LogicalJoin(condition=[AND(=($5, $11), =($9, $12))], joinType=[inner])
+  LogicalJoin(condition=[AND(=($5, $11), IS NOT DISTINCT FROM($9, $12))], 
joinType=[inner])
     LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], 
SAL=[$5], COMM=[$6], SLACKER=[$8], SAL0=[$5], $f9=[=($5, 4)])
       LogicalFilter(condition=[AND(=($7, 20), >($5, 1000))])
         LogicalTableScan(table=[[CATALOG, SALES, EMPNULLABLES]])
diff --git 
a/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml 
b/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml
index 316fb51ba5..2dcbe1f413 100644
--- a/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml
@@ -5178,7 +5178,7 @@ from (select 2+deptno d2, 3+deptno d3 from emp) e
       <![CDATA[
 LogicalProject(D2=[$0], D3=[$1])
   LogicalProject(D2=[$0], D3=[$1], D1=[CAST($2):INTEGER], D6=[$3], 
$f2=[CAST($4):BOOLEAN])
-    LogicalJoin(condition=[AND(=($0, $2), =($1, $3))], joinType=[inner])
+    LogicalJoin(condition=[AND(=($0, $2), IS NOT DISTINCT FROM($1, $3))], 
joinType=[inner])
       LogicalProject(D2=[+(2, $7)], D3=[+(3, $7)])
         LogicalTableScan(table=[[CATALOG, SALES, EMP]])
       LogicalAggregate(group=[{0, 1}], agg#0=[MIN($2)])
@@ -5208,7 +5208,7 @@ from (select 2+deptno d2, 3+deptno d3 from emp) e
       <![CDATA[
 LogicalProject(D2=[$0], D3=[$1])
   LogicalProject(D2=[$0], D3=[$1], D1=[CAST($2):INTEGER], D6=[$3], 
$f2=[CAST($4):BOOLEAN])
-    LogicalJoin(condition=[AND(=($0, $2), =($1, $3))], joinType=[inner])
+    LogicalJoin(condition=[AND(=($0, $2), IS NOT DISTINCT FROM($1, $3))], 
joinType=[inner])
       LogicalProject(D2=[+(2, $7)], D3=[+(3, $7)])
         LogicalTableScan(table=[[CATALOG, SALES, EMP]])
       LogicalAggregate(group=[{0, 1}], agg#0=[MIN($2)])
diff --git a/core/src/test/resources/sql/sub-query.iq 
b/core/src/test/resources/sql/sub-query.iq
index 392463ab3f..b2217579d1 100644
--- a/core/src/test/resources/sql/sub-query.iq
+++ b/core/src/test/resources/sql/sub-query.iq
@@ -4178,6 +4178,92 @@ where "scott".emp.deptno in (select "scott".dept.deptno
 
 !ok
 
+select *
+from "scott".emp as e
+where exists (
+  select empno
+  from "scott".emp as ee
+  where e.empno = ee.empno or e.comm >= ee.sal
+);
++-------+--------+-----------+------+------------+---------+---------+--------+
+| EMPNO | ENAME  | JOB       | MGR  | HIREDATE   | SAL     | COMM    | DEPTNO |
++-------+--------+-----------+------+------------+---------+---------+--------+
+|  7369 | SMITH  | CLERK     | 7902 | 1980-12-17 |  800.00 |         |     20 |
+|  7499 | ALLEN  | SALESMAN  | 7698 | 1981-02-20 | 1600.00 |  300.00 |     30 |
+|  7521 | WARD   | SALESMAN  | 7698 | 1981-02-22 | 1250.00 |  500.00 |     30 |
+|  7566 | JONES  | MANAGER   | 7839 | 1981-02-04 | 2975.00 |         |     20 |
+|  7654 | MARTIN | SALESMAN  | 7698 | 1981-09-28 | 1250.00 | 1400.00 |     30 |
+|  7698 | BLAKE  | MANAGER   | 7839 | 1981-01-05 | 2850.00 |         |     30 |
+|  7782 | CLARK  | MANAGER   | 7839 | 1981-06-09 | 2450.00 |         |     10 |
+|  7788 | SCOTT  | ANALYST   | 7566 | 1987-04-19 | 3000.00 |         |     20 |
+|  7839 | KING   | PRESIDENT |      | 1981-11-17 | 5000.00 |         |     10 |
+|  7844 | TURNER | SALESMAN  | 7698 | 1981-09-08 | 1500.00 |    0.00 |     30 |
+|  7876 | ADAMS  | CLERK     | 7788 | 1987-05-23 | 1100.00 |         |     20 |
+|  7900 | JAMES  | CLERK     | 7698 | 1981-12-03 |  950.00 |         |     30 |
+|  7902 | FORD   | ANALYST   | 7566 | 1981-12-03 | 3000.00 |         |     20 |
+|  7934 | MILLER | CLERK     | 7782 | 1982-01-23 | 1300.00 |         |     10 |
++-------+--------+-----------+------+------------+---------+---------+--------+
+(14 rows)
+
+!ok
+
+
+SELECT
+  e1.COMM,
+  EXISTS (
+    SELECT 1
+    FROM EMP e2
+    WHERE e2.COMM IS NULL OR e2.COMM > e1.COMM * 10
+  ) AS exists_flag
+FROM EMP e1;
++---------+-------------+
+| COMM    | EXISTS_FLAG |
++---------+-------------+
+|    0.00 | true        |
+| 1400.00 | true        |
+|  300.00 | true        |
+|  500.00 | true        |
+|         | true        |
+|         | true        |
+|         | true        |
+|         | true        |
+|         | true        |
+|         | true        |
+|         | true        |
+|         | true        |
+|         | true        |
+|         | true        |
++---------+-------------+
+(14 rows)
+
+!ok
+
+SELECT
+  i1.COMM,
+  i1.COMM = ANY(SELECT COMM FROM EMP WHERE COMM=i1.COMM)
+FROM EMP i1 ORDER BY COMM;
++---------+--------+
+| COMM    | EXPR$1 |
++---------+--------+
+|    0.00 | true   |
+|  300.00 | true   |
+|  500.00 | true   |
+| 1400.00 | true   |
+|         |        |
+|         |        |
+|         |        |
+|         |        |
+|         |        |
+|         |        |
+|         |        |
+|         |        |
+|         |        |
+|         |        |
++---------+--------+
+(14 rows)
+
+!ok
+
 # Test case for [CALCITE-5789]
 select deptno from dept d1 where exists (
  select 1 from dept d2 where d2.deptno = d1.deptno and exists (

Reply via email to