This is an automated email from the ASF dual-hosted git repository.

zhenchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/main by this push:
     new 15d79dbbbb [CALCITE-4765] Complex correlated EXISTS sub-queries used 
as scalar subqueries can return wrong results
15d79dbbbb is described below

commit 15d79dbbbbaee7ec283c4293baf6cb3c94664232
Author: Zhen Chen <[email protected]>
AuthorDate: Thu Jan 22 09:42:11 2026 +0800

    [CALCITE-4765] Complex correlated EXISTS sub-queries used as scalar 
subqueries can return wrong results
---
 .../apache/calcite/test/SqlToRelConverterTest.java |  23 +++++
 .../apache/calcite/test/SqlToRelConverterTest.xml  |  44 +++++++++
 core/src/test/resources/sql/blank.iq               | 102 +++++++++++++++++++++
 3 files changed, 169 insertions(+)

diff --git 
a/core/src/test/java/org/apache/calcite/test/SqlToRelConverterTest.java 
b/core/src/test/java/org/apache/calcite/test/SqlToRelConverterTest.java
index 28173513d8..f0d068d5bb 100644
--- a/core/src/test/java/org/apache/calcite/test/SqlToRelConverterTest.java
+++ b/core/src/test/java/org/apache/calcite/test/SqlToRelConverterTest.java
@@ -2048,6 +2048,29 @@ void checkCorrelatedMapSubQuery(boolean expand) {
     sql(sql).withDecorrelate(true).withExpand(false).ok();
   }
 
+  /** Test case for <a 
href="https://issues.apache.org/jira/browse/CALCITE-4765";>[CALCITE-4765]
+   * Complex correlated EXISTS sub-queries used as scalar subqueries
+   * can return wrong results</a>. */
+  @Test void testExistsCorrelatedSubQuery() {
+    final String sql = "select * from emp e1 where exists (\n"
+        + "  select * from (\n"
+        + "    select e2.deptno from emp e2\n"
+        + "    where e2.comm = e1.comm) as table3\n"
+        + "  where table3.deptno <> e1.deptno)";
+    sql(sql).withDecorrelate(false).ok();
+  }
+
+  /** Test case for <a 
href="https://issues.apache.org/jira/browse/CALCITE-4765";>[CALCITE-4765]
+   * Complex correlated EXISTS sub-queries used as scalar subqueries
+   * can return wrong results</a>. */
+  @Test void testExistsCorrelatedSubQuery2() {
+    final String sql = "SELECT *, EXISTS(select * from (\n"
+        + "  SELECT e2.deptno FROM emp e2 where e1.comm = e2.comm) as table3\n"
+        + "  where table3.deptno <> e1.deptno)\n"
+        + "from emp e1";
+    sql(sql).withDecorrelate(false).ok();
+  }
+
   @Test void testExistsCorrelatedLimit() {
     final String sql = "select*from emp where exists (\n"
         + "  select 1 from dept where emp.deptno=dept.deptno limit 1)";
diff --git 
a/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml 
b/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml
index bd829189cf..c8c136c97f 100644
--- a/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml
@@ -2241,6 +2241,50 @@ LogicalSort(fetch=[1])
       LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
 })], variablesSet=[[$cor0]])
     LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testExistsCorrelatedSubQuery">
+    <Resource name="sql">
+      <![CDATA[select * from emp e1 where exists (
+  select * from (
+    select e2.deptno from emp e2
+    where e2.comm = e1.comm) as table3
+  where table3.deptno <> e1.deptno)]]>
+    </Resource>
+    <Resource name="plan">
+      <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], 
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+  LogicalFilter(condition=[IS NOT NULL($9)])
+    LogicalCorrelate(correlation=[$cor1], joinType=[left], 
requiredColumns=[{6, 7}])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+      LogicalAggregate(group=[{}], agg#0=[MIN($0)])
+        LogicalProject($f0=[true])
+          LogicalFilter(condition=[<>($0, $cor1.DEPTNO)])
+            LogicalProject(DEPTNO=[$7])
+              LogicalFilter(condition=[=($6, $cor1.COMM)])
+                LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testExistsCorrelatedSubQuery2">
+    <Resource name="sql">
+      <![CDATA[SELECT *, EXISTS(select * from (
+  SELECT e2.deptno FROM emp e2 where e1.comm = e2.comm) as table3
+  where table3.deptno <> e1.deptno)
+from emp e1]]>
+    </Resource>
+    <Resource name="plan">
+      <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], 
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], EXPR$9=[IS NOT NULL($9)])
+  LogicalCorrelate(correlation=[$cor1], joinType=[left], requiredColumns=[{6, 
7}])
+    LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+    LogicalAggregate(group=[{}], agg#0=[MIN($0)])
+      LogicalProject($f0=[true])
+        LogicalFilter(condition=[<>($0, $cor1.DEPTNO)])
+          LogicalProject(DEPTNO=[$7])
+            LogicalFilter(condition=[=($cor1.COMM, $6)])
+              LogicalTableScan(table=[[CATALOG, SALES, EMP]])
 ]]>
     </Resource>
   </TestCase>
diff --git a/core/src/test/resources/sql/blank.iq 
b/core/src/test/resources/sql/blank.iq
index c92dc0f095..a84952e7f9 100644
--- a/core/src/test/resources/sql/blank.iq
+++ b/core/src/test/resources/sql/blank.iq
@@ -294,4 +294,106 @@ FROM "EMP";
 
 !ok
 
+# [CALCITE-4765] Complex correlated EXISTS sub-queries used as scalar 
subqueries can return wrong results
+CREATE TABLE tmp_emps (
+  empid INTEGER NOT NULL,
+  deptno INTEGER NOT NULL,
+  name VARCHAR(10) NOT NULL,
+  salary DECIMAL(10, 2) NOT NULL,
+  commission INTEGER);
+(0 rows modified)
+
+!update
+INSERT INTO tmp_emps (empid, deptno, name, salary, commission) VALUES
+(100, 10, 'Bill', 10000.00, 1000),
+(200, 20, 'Eric', 8000.00, 500),
+(150, 10, 'Sebastian', 7000.00, NULL),
+(110, 10, 'Theodore', 11500.00, 250),
+(170, 30, 'Theodore', 11500.00, 250),
+(140, 10, 'Sebastian', 7000.00, NULL);
+(6 rows modified)
+
+!update
+select * from tmp_emps e1 where EXISTS(select * from (
+  select e2.deptno from tmp_emps e2
+  where e2.commission = e1.commission) as table3
+where  table3.deptno <> e1.deptno);
++-------+--------+----------+----------+------------+
+| EMPID | DEPTNO | NAME     | SALARY   | COMMISSION |
++-------+--------+----------+----------+------------+
+|   110 |     10 | Theodore | 11500.00 |        250 |
+|   170 |     30 | Theodore | 11500.00 |        250 |
++-------+--------+----------+----------+------------+
+(2 rows)
+
+!ok
+!if (use_old_decorr) {
+EnumerableHashJoin(condition=[AND(=($1, $5), =($4, $7))], joinType=[semi])
+  EnumerableTableScan(table=[[BLANK, TMP_EMPS]])
+  EnumerableNestedLoopJoin(condition=[<>($1, $0)], joinType=[inner])
+    EnumerableAggregate(group=[{1}])
+      EnumerableTableScan(table=[[BLANK, TMP_EMPS]])
+    EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NOT NULL($t4)], 
DEPTNO=[$t1], COMMISSION=[$t4], $condition=[$t5])
+      EnumerableTableScan(table=[[BLANK, TMP_EMPS]])
+!plan
+!}
+
+!if (use_new_decorr) {
+EnumerableHashJoin(condition=[AND(IS NOT DISTINCT FROM($1, $5), IS NOT 
DISTINCT FROM($4, $6))], joinType=[semi])
+  EnumerableTableScan(table=[[BLANK, TMP_EMPS]])
+  EnumerableCalc(expr#0..3=[{inputs}], proj#0..1=[{exprs}])
+    EnumerableHashJoin(condition=[AND(=($1, $3), <>($2, $0))], 
joinType=[inner])
+      EnumerableAggregate(group=[{1, 4}])
+        EnumerableTableScan(table=[[BLANK, TMP_EMPS]])
+      EnumerableCalc(expr#0..4=[{inputs}], DEPTNO=[$t1], COMMISSION=[$t4])
+        EnumerableTableScan(table=[[BLANK, TMP_EMPS]])
+!plan
+!}
+
+SELECT *, EXISTS(select * from (
+  SELECT e2.deptno FROM tmp_emps e2 where e1.commission = e2.commission) as 
table3
+  where table3.deptno <> e1.deptno)
+from tmp_emps e1;
++-------+--------+-----------+----------+------------+--------+
+| EMPID | DEPTNO | NAME      | SALARY   | COMMISSION | EXPR$5 |
++-------+--------+-----------+----------+------------+--------+
+|   100 |     10 | Bill      | 10000.00 |       1000 | false  |
+|   110 |     10 | Theodore  | 11500.00 |        250 | true   |
+|   140 |     10 | Sebastian |  7000.00 |            | false  |
+|   150 |     10 | Sebastian |  7000.00 |            | false  |
+|   170 |     30 | Theodore  | 11500.00 |        250 | true   |
+|   200 |     20 | Eric      |  8000.00 |        500 | false  |
++-------+--------+-----------+----------+------------+--------+
+(6 rows)
+
+!ok
+
+!if (use_old_decorr) {
+EnumerableCalc(expr#0..7=[{inputs}], expr#8=[IS NOT NULL($t7)], 
proj#0..4=[{exprs}], EXPR$5=[$t8])
+  EnumerableMergeJoin(condition=[AND(=($1, $5), =($4, $6))], joinType=[left])
+    EnumerableSort(sort0=[$1], sort1=[$4], dir0=[ASC], dir1=[ASC])
+      EnumerableTableScan(table=[[BLANK, TMP_EMPS]])
+    EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])
+      EnumerableCalc(expr#0..1=[{inputs}], expr#2=[true], DEPTNO0=[$t1], 
COMMISSION=[$t0], $f2=[$t2])
+        EnumerableAggregate(group=[{1, 2}])
+          EnumerableNestedLoopJoin(condition=[<>($0, $2)], joinType=[inner])
+            EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NOT NULL($t4)], 
DEPTNO=[$t1], COMMISSION=[$t4], $condition=[$t5])
+              EnumerableTableScan(table=[[BLANK, TMP_EMPS]])
+            EnumerableAggregate(group=[{1}])
+              EnumerableTableScan(table=[[BLANK, TMP_EMPS]])
+!plan
+!}
+
+!if (use_new_decorr) {
+EnumerableHashJoin(condition=[AND(IS NOT DISTINCT FROM($1, $5), IS NOT 
DISTINCT FROM($4, $6))], joinType=[left_mark])
+  EnumerableTableScan(table=[[BLANK, TMP_EMPS]])
+  EnumerableCalc(expr#0..3=[{inputs}], proj#0..1=[{exprs}])
+    EnumerableHashJoin(condition=[AND(=($1, $3), <>($2, $0))], 
joinType=[inner])
+      EnumerableAggregate(group=[{1, 4}])
+        EnumerableTableScan(table=[[BLANK, TMP_EMPS]])
+      EnumerableCalc(expr#0..4=[{inputs}], DEPTNO=[$t1], COMMISSION=[$t4])
+        EnumerableTableScan(table=[[BLANK, TMP_EMPS]])
+!plan
+!}
+
 # End blank.iq

Reply via email to