This is an automated email from the ASF dual-hosted git repository.
zhenchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new 15d79dbbbb [CALCITE-4765] Complex correlated EXISTS sub-queries used
as scalar subqueries can return wrong results
15d79dbbbb is described below
commit 15d79dbbbbaee7ec283c4293baf6cb3c94664232
Author: Zhen Chen <[email protected]>
AuthorDate: Thu Jan 22 09:42:11 2026 +0800
[CALCITE-4765] Complex correlated EXISTS sub-queries used as scalar
subqueries can return wrong results
---
.../apache/calcite/test/SqlToRelConverterTest.java | 23 +++++
.../apache/calcite/test/SqlToRelConverterTest.xml | 44 +++++++++
core/src/test/resources/sql/blank.iq | 102 +++++++++++++++++++++
3 files changed, 169 insertions(+)
diff --git
a/core/src/test/java/org/apache/calcite/test/SqlToRelConverterTest.java
b/core/src/test/java/org/apache/calcite/test/SqlToRelConverterTest.java
index 28173513d8..f0d068d5bb 100644
--- a/core/src/test/java/org/apache/calcite/test/SqlToRelConverterTest.java
+++ b/core/src/test/java/org/apache/calcite/test/SqlToRelConverterTest.java
@@ -2048,6 +2048,29 @@ void checkCorrelatedMapSubQuery(boolean expand) {
sql(sql).withDecorrelate(true).withExpand(false).ok();
}
+ /** Test case for <a
href="https://issues.apache.org/jira/browse/CALCITE-4765">[CALCITE-4765]
+ * Complex correlated EXISTS sub-queries used as scalar subqueries
+ * can return wrong results</a>. */
+ @Test void testExistsCorrelatedSubQuery() {
+ final String sql = "select * from emp e1 where exists (\n"
+ + " select * from (\n"
+ + " select e2.deptno from emp e2\n"
+ + " where e2.comm = e1.comm) as table3\n"
+ + " where table3.deptno <> e1.deptno)";
+ sql(sql).withDecorrelate(false).ok();
+ }
+
+ /** Test case for <a
href="https://issues.apache.org/jira/browse/CALCITE-4765">[CALCITE-4765]
+ * Complex correlated EXISTS sub-queries used as scalar subqueries
+ * can return wrong results</a>. */
+ @Test void testExistsCorrelatedSubQuery2() {
+ final String sql = "SELECT *, EXISTS(select * from (\n"
+ + " SELECT e2.deptno FROM emp e2 where e1.comm = e2.comm) as table3\n"
+ + " where table3.deptno <> e1.deptno)\n"
+ + "from emp e1";
+ sql(sql).withDecorrelate(false).ok();
+ }
+
@Test void testExistsCorrelatedLimit() {
final String sql = "select*from emp where exists (\n"
+ " select 1 from dept where emp.deptno=dept.deptno limit 1)";
diff --git
a/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml
b/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml
index bd829189cf..c8c136c97f 100644
--- a/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml
@@ -2241,6 +2241,50 @@ LogicalSort(fetch=[1])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
})], variablesSet=[[$cor0]])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testExistsCorrelatedSubQuery">
+ <Resource name="sql">
+ <![CDATA[select * from emp e1 where exists (
+ select * from (
+ select e2.deptno from emp e2
+ where e2.comm = e1.comm) as table3
+ where table3.deptno <> e1.deptno)]]>
+ </Resource>
+ <Resource name="plan">
+ <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4],
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+ LogicalFilter(condition=[IS NOT NULL($9)])
+ LogicalCorrelate(correlation=[$cor1], joinType=[left],
requiredColumns=[{6, 7}])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalAggregate(group=[{}], agg#0=[MIN($0)])
+ LogicalProject($f0=[true])
+ LogicalFilter(condition=[<>($0, $cor1.DEPTNO)])
+ LogicalProject(DEPTNO=[$7])
+ LogicalFilter(condition=[=($6, $cor1.COMM)])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testExistsCorrelatedSubQuery2">
+ <Resource name="sql">
+ <![CDATA[SELECT *, EXISTS(select * from (
+ SELECT e2.deptno FROM emp e2 where e1.comm = e2.comm) as table3
+ where table3.deptno <> e1.deptno)
+from emp e1]]>
+ </Resource>
+ <Resource name="plan">
+ <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4],
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], EXPR$9=[IS NOT NULL($9)])
+ LogicalCorrelate(correlation=[$cor1], joinType=[left], requiredColumns=[{6,
7}])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalAggregate(group=[{}], agg#0=[MIN($0)])
+ LogicalProject($f0=[true])
+ LogicalFilter(condition=[<>($0, $cor1.DEPTNO)])
+ LogicalProject(DEPTNO=[$7])
+ LogicalFilter(condition=[=($cor1.COMM, $6)])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
diff --git a/core/src/test/resources/sql/blank.iq
b/core/src/test/resources/sql/blank.iq
index c92dc0f095..a84952e7f9 100644
--- a/core/src/test/resources/sql/blank.iq
+++ b/core/src/test/resources/sql/blank.iq
@@ -294,4 +294,106 @@ FROM "EMP";
!ok
+# [CALCITE-4765] Complex correlated EXISTS sub-queries used as scalar
subqueries can return wrong results
+CREATE TABLE tmp_emps (
+ empid INTEGER NOT NULL,
+ deptno INTEGER NOT NULL,
+ name VARCHAR(10) NOT NULL,
+ salary DECIMAL(10, 2) NOT NULL,
+ commission INTEGER);
+(0 rows modified)
+
+!update
+INSERT INTO tmp_emps (empid, deptno, name, salary, commission) VALUES
+(100, 10, 'Bill', 10000.00, 1000),
+(200, 20, 'Eric', 8000.00, 500),
+(150, 10, 'Sebastian', 7000.00, NULL),
+(110, 10, 'Theodore', 11500.00, 250),
+(170, 30, 'Theodore', 11500.00, 250),
+(140, 10, 'Sebastian', 7000.00, NULL);
+(6 rows modified)
+
+!update
+select * from tmp_emps e1 where EXISTS(select * from (
+ select e2.deptno from tmp_emps e2
+ where e2.commission = e1.commission) as table3
+where table3.deptno <> e1.deptno);
++-------+--------+----------+----------+------------+
+| EMPID | DEPTNO | NAME | SALARY | COMMISSION |
++-------+--------+----------+----------+------------+
+| 110 | 10 | Theodore | 11500.00 | 250 |
+| 170 | 30 | Theodore | 11500.00 | 250 |
++-------+--------+----------+----------+------------+
+(2 rows)
+
+!ok
+!if (use_old_decorr) {
+EnumerableHashJoin(condition=[AND(=($1, $5), =($4, $7))], joinType=[semi])
+ EnumerableTableScan(table=[[BLANK, TMP_EMPS]])
+ EnumerableNestedLoopJoin(condition=[<>($1, $0)], joinType=[inner])
+ EnumerableAggregate(group=[{1}])
+ EnumerableTableScan(table=[[BLANK, TMP_EMPS]])
+ EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NOT NULL($t4)],
DEPTNO=[$t1], COMMISSION=[$t4], $condition=[$t5])
+ EnumerableTableScan(table=[[BLANK, TMP_EMPS]])
+!plan
+!}
+
+!if (use_new_decorr) {
+EnumerableHashJoin(condition=[AND(IS NOT DISTINCT FROM($1, $5), IS NOT
DISTINCT FROM($4, $6))], joinType=[semi])
+ EnumerableTableScan(table=[[BLANK, TMP_EMPS]])
+ EnumerableCalc(expr#0..3=[{inputs}], proj#0..1=[{exprs}])
+ EnumerableHashJoin(condition=[AND(=($1, $3), <>($2, $0))],
joinType=[inner])
+ EnumerableAggregate(group=[{1, 4}])
+ EnumerableTableScan(table=[[BLANK, TMP_EMPS]])
+ EnumerableCalc(expr#0..4=[{inputs}], DEPTNO=[$t1], COMMISSION=[$t4])
+ EnumerableTableScan(table=[[BLANK, TMP_EMPS]])
+!plan
+!}
+
+SELECT *, EXISTS(select * from (
+ SELECT e2.deptno FROM tmp_emps e2 where e1.commission = e2.commission) as
table3
+ where table3.deptno <> e1.deptno)
+from tmp_emps e1;
++-------+--------+-----------+----------+------------+--------+
+| EMPID | DEPTNO | NAME | SALARY | COMMISSION | EXPR$5 |
++-------+--------+-----------+----------+------------+--------+
+| 100 | 10 | Bill | 10000.00 | 1000 | false |
+| 110 | 10 | Theodore | 11500.00 | 250 | true |
+| 140 | 10 | Sebastian | 7000.00 | | false |
+| 150 | 10 | Sebastian | 7000.00 | | false |
+| 170 | 30 | Theodore | 11500.00 | 250 | true |
+| 200 | 20 | Eric | 8000.00 | 500 | false |
++-------+--------+-----------+----------+------------+--------+
+(6 rows)
+
+!ok
+
+!if (use_old_decorr) {
+EnumerableCalc(expr#0..7=[{inputs}], expr#8=[IS NOT NULL($t7)],
proj#0..4=[{exprs}], EXPR$5=[$t8])
+ EnumerableMergeJoin(condition=[AND(=($1, $5), =($4, $6))], joinType=[left])
+ EnumerableSort(sort0=[$1], sort1=[$4], dir0=[ASC], dir1=[ASC])
+ EnumerableTableScan(table=[[BLANK, TMP_EMPS]])
+ EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])
+ EnumerableCalc(expr#0..1=[{inputs}], expr#2=[true], DEPTNO0=[$t1],
COMMISSION=[$t0], $f2=[$t2])
+ EnumerableAggregate(group=[{1, 2}])
+ EnumerableNestedLoopJoin(condition=[<>($0, $2)], joinType=[inner])
+ EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NOT NULL($t4)],
DEPTNO=[$t1], COMMISSION=[$t4], $condition=[$t5])
+ EnumerableTableScan(table=[[BLANK, TMP_EMPS]])
+ EnumerableAggregate(group=[{1}])
+ EnumerableTableScan(table=[[BLANK, TMP_EMPS]])
+!plan
+!}
+
+!if (use_new_decorr) {
+EnumerableHashJoin(condition=[AND(IS NOT DISTINCT FROM($1, $5), IS NOT
DISTINCT FROM($4, $6))], joinType=[left_mark])
+ EnumerableTableScan(table=[[BLANK, TMP_EMPS]])
+ EnumerableCalc(expr#0..3=[{inputs}], proj#0..1=[{exprs}])
+ EnumerableHashJoin(condition=[AND(=($1, $3), <>($2, $0))],
joinType=[inner])
+ EnumerableAggregate(group=[{1, 4}])
+ EnumerableTableScan(table=[[BLANK, TMP_EMPS]])
+ EnumerableCalc(expr#0..4=[{inputs}], DEPTNO=[$t1], COMMISSION=[$t4])
+ EnumerableTableScan(table=[[BLANK, TMP_EMPS]])
+!plan
+!}
+
# End blank.iq