This is an automated email from the ASF dual-hosted git repository.

mbudiu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/main by this push:
     new 393ccd1afd [CALCITE-6778] SOME rewrite for correlated queries does not 
handle null values correctly
393ccd1afd is described below

commit 393ccd1afddf57ab55440ea7ee5b98c9a0092483
Author: Rafael Acevedo <[email protected]>
AuthorDate: Mon Jan 13 10:20:48 2025 -0300

    [CALCITE-6778] SOME rewrite for correlated queries does not handle null 
values correctly
---
 .../calcite/rel/rules/SubQueryRemoveRule.java      | 23 +++++--
 core/src/test/resources/sql/sub-query.iq           | 77 +++++++++++++---------
 2 files changed, 62 insertions(+), 38 deletions(-)

diff --git 
a/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java 
b/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
index 4aa31a65f4..88065c61d5 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
@@ -392,21 +392,30 @@ public class SubQueryRemoveRule
         //     then false // sub-query is empty for corresponding corr value
         //   when q.c = 0 then false // sub-query is empty
         //   when e.deptno is null then unknown
-        //   when q.c <> q.d && q.d <= 1
+        //   when q.c <> q.d && q.dd <= 1
         //     then e.deptno != m || unknown
-        //   when q.d = 1
+        //   when q.dd = 1
         //     then e.deptno != m // sub-query has the distinct result
         //   else true
         //   end as v
         // from emp as e
         // left outer join (
-        //   select name, count(distinct *) as c, count(distinct deptno) as d,
+        //   select name, count(*) as c, count(deptno) as d, count(distinct 
deptno) as dd,
         //       max(deptno) as m, "alwaysTrue" as indicator
         //   from emp group by name) as q on e.name = q.name
+
+        // Additional details on the `q.c <> q.d && q.dd <= 1` clause:
+        // the q.c <> q.d comparison identifies if there are any null values,
+        // since count(*) counts null values and count(deptno) does not.
+        // if there's no null value, c should be equal to d.
+        // the q.dd <= 1 part means: true if there is at most one non-null 
value
+        // so this clause means:
+        // "if there are any null values and there is at most one non-null 
value".
         builder.push(e.rel)
             .aggregate(builder.groupKey(),
-                builder.count(true, "c"),
-                builder.count(true, "d", builder.field(0)),
+                builder.count(false, "c"),
+                builder.count(false, "d", builder.field(0)),
+                builder.count(true, "dd", builder.field(0)),
                 builder.max(builder.field(0)).as("m"));
 
         parentQueryFields.addAll(builder.fields());
@@ -423,12 +432,12 @@ public class SubQueryRemoveRule
                 literalUnknown,
                 builder.and(
                     builder.notEquals(builder.field("d"), builder.field("c")),
-                    builder.lessThanOrEqual(builder.field("d"),
+                    builder.lessThanOrEqual(builder.field("dd"),
                         builder.literal(1))),
                 builder.or(
                     builder.notEquals(e.operands.get(0), builder.field(qAlias, 
"m")),
                     literalUnknown),
-                builder.equals(builder.field("d"), builder.literal(1)),
+                builder.equals(builder.field("dd"), builder.literal(1)),
                 builder.notEquals(e.operands.get(0), builder.field(qAlias, 
"m")),
                 literalTrue);
         break;
diff --git a/core/src/test/resources/sql/sub-query.iq 
b/core/src/test/resources/sql/sub-query.iq
index 50bef74d92..45e3e72a1b 100644
--- a/core/src/test/resources/sql/sub-query.iq
+++ b/core/src/test/resources/sql/sub-query.iq
@@ -2370,11 +2370,11 @@ EnumerableCalc(expr#0..2=[{inputs}], ENAME=[$t1])
 select empno
 from "scott".emp emp1
 where empno <> some (select emp2.empno from "scott".emp emp2 where emp2.empno 
= emp1.empno);
-EnumerableCalc(expr#0..5=[{inputs}], expr#6=[<>($t2, $t1)], expr#7=[1], 
expr#8=[<=($t2, $t7)], expr#9=[<>($t0, $t3)], expr#10=[IS NULL($t4)], 
expr#11=[0], expr#12=[=($t1, $t11)], expr#13=[OR($t10, $t12)], expr#14=[IS NOT 
TRUE($t13)], expr#15=[AND($t6, $t8, $t9, $t14)], expr#16=[=($t2, $t7)], 
expr#17=[IS NOT NULL($t2)], expr#18=[AND($t6, $t17)], expr#19=[IS NOT 
TRUE($t18)], expr#20=[AND($t16, $t9, $t14, $t19)], expr#21=[AND($t6, $t8)], 
expr#22=[IS NOT TRUE($t21)], expr#23=[IS NOT TRUE($t1 [...]
-  EnumerableMergeJoin(condition=[=($0, $5)], joinType=[left])
+EnumerableCalc(expr#0..6=[{inputs}], expr#7=[<>($t2, $t1)], expr#8=[1], 
expr#9=[<=($t3, $t8)], expr#10=[<>($t0, $t4)], expr#11=[IS NULL($t5)], 
expr#12=[0], expr#13=[=($t1, $t12)], expr#14=[OR($t11, $t13)], expr#15=[IS NOT 
TRUE($t14)], expr#16=[AND($t7, $t9, $t10, $t15)], expr#17=[=($t3, $t8)], 
expr#18=[IS NOT NULL($t3)], expr#19=[AND($t7, $t18)], expr#20=[IS NOT 
TRUE($t19)], expr#21=[AND($t17, $t10, $t15, $t20)], expr#22=[AND($t7, $t9)], 
expr#23=[IS NOT TRUE($t22)], expr#24=[IS NOT TRUE( [...]
+  EnumerableMergeJoin(condition=[=($0, $6)], joinType=[left])
     EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0])
       EnumerableTableScan(table=[[scott, EMP]])
-    EnumerableCalc(expr#0..7=[{inputs}], expr#8=[1:BIGINT], expr#9=[true], 
c=[$t8], d=[$t8], m=[$t0], trueLiteral=[$t9], EMPNO1=[$t0])
+    EnumerableCalc(expr#0..7=[{inputs}], expr#8=[1:BIGINT], expr#9=[true], 
c=[$t8], d=[$t8], dd=[$t8], m=[$t0], trueLiteral=[$t9], EMPNO1=[$t0])
       EnumerableTableScan(table=[[scott, EMP]])
 !plan
 +-------+
@@ -2414,15 +2414,15 @@ from "scott".emp emp1;
 select *
 from "scott".emp emp1
 where empno <> some (select comm from "scott".emp where deptno = emp1.deptno);
-EnumerableCalc(expr#0..12=[{inputs}], expr#13=[<>($t9, $t8)], expr#14=[1], 
expr#15=[<=($t9, $t14)], expr#16=[AND($t13, $t15)], expr#17=[=($t9, $t14)], 
expr#18=[OR($t16, $t17)], expr#19=[<>($t0, $t10)], expr#20=[IS NULL($t11)], 
expr#21=[0], expr#22=[=($t8, $t21)], expr#23=[OR($t20, $t22)], expr#24=[IS NOT 
TRUE($t23)], expr#25=[AND($t18, $t19, $t24)], expr#26=[IS NOT TRUE($t18)], 
expr#27=[AND($t24, $t26)], expr#28=[OR($t25, $t27)], proj#0..7=[{exprs}], 
$condition=[$t28])
-  EnumerableMergeJoin(condition=[=($7, $12)], joinType=[left])
+EnumerableCalc(expr#0..13=[{inputs}], expr#14=[<>($t9, $t8)], expr#15=[1], 
expr#16=[<=($t10, $t15)], expr#17=[AND($t14, $t16)], expr#18=[=($t10, $t15)], 
expr#19=[OR($t17, $t18)], expr#20=[<>($t0, $t11)], expr#21=[IS NULL($t12)], 
expr#22=[0], expr#23=[=($t8, $t22)], expr#24=[OR($t21, $t23)], expr#25=[IS NOT 
TRUE($t24)], expr#26=[AND($t19, $t20, $t25)], expr#27=[IS NOT TRUE($t19)], 
expr#28=[AND($t25, $t27)], expr#29=[OR($t26, $t28)], proj#0..7=[{exprs}], 
$condition=[$t29])
+  EnumerableMergeJoin(condition=[=($7, $13)], joinType=[left])
     EnumerableSort(sort0=[$7], dir0=[ASC])
       EnumerableTableScan(table=[[scott, EMP]])
-    EnumerableSort(sort0=[$4], dir0=[ASC])
-      EnumerableCalc(expr#0..3=[{inputs}], expr#4=[true], c=[$t1], d=[$t2], 
m=[$t3], trueLiteral=[$t4], DEPTNO=[$t0])
-        EnumerableAggregate(group=[{1}], c=[COUNT() FILTER $4], d=[COUNT($0) 
FILTER $3], m=[MIN($2) FILTER $4])
-          EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, 
$t4)], expr#6=[2], expr#7=[=($t3, $t6)], proj#0..2=[{exprs}], $g_0=[$t5], 
$g_2=[$t7])
-            EnumerableAggregate(group=[{6, 7}], groups=[[{6, 7}, {7}]], 
m=[MAX($6)], $g=[GROUPING($6, $7)])
+    EnumerableSort(sort0=[$5], dir0=[ASC])
+      EnumerableCalc(expr#0..4=[{inputs}], expr#5=[CAST($t1):BIGINT NOT NULL], 
expr#6=[CAST($t2):BIGINT NOT NULL], expr#7=[true], c=[$t5], d=[$t6], dd=[$t3], 
m=[$t4], trueLiteral=[$t7], DEPTNO=[$t0])
+        EnumerableAggregate(group=[{1}], c=[MIN($2) FILTER $6], d=[MIN($3) 
FILTER $6], dd=[COUNT($0) FILTER $5], m=[MIN($4) FILTER $6])
+          EnumerableCalc(expr#0..5=[{inputs}], expr#6=[0], expr#7=[=($t5, 
$t6)], expr#8=[2], expr#9=[=($t5, $t8)], proj#0..4=[{exprs}], $g_0=[$t7], 
$g_2=[$t9])
+            EnumerableAggregate(group=[{6, 7}], groups=[[{6, 7}, {7}]], 
c=[COUNT()], d=[COUNT($6)], m=[MAX($6)], $g=[GROUPING($6, $7)])
               EnumerableCalc(expr#0..7=[{inputs}], expr#8=[IS NOT NULL($t7)], 
proj#0..7=[{exprs}], $condition=[$t8])
                 EnumerableTableScan(table=[[scott, EMP]])
 !plan
@@ -2469,14 +2469,14 @@ from "scott".emp as emp1;
 select *
 from "scott".emp as emp1
 where empno <> some (select 2 from "scott".dept dept1 where dept1.deptno = 
emp1.empno);
-EnumerableCalc(expr#0..12=[{inputs}], expr#13=[<>($t9, $t8)], expr#14=[1], 
expr#15=[<=($t9, $t14)], expr#16=[<>($t0, $t10)], expr#17=[IS NULL($t11)], 
expr#18=[0], expr#19=[=($t8, $t18)], expr#20=[OR($t17, $t19)], expr#21=[IS NOT 
TRUE($t20)], expr#22=[AND($t13, $t15, $t16, $t21)], expr#23=[=($t9, $t14)], 
expr#24=[IS NOT NULL($t9)], expr#25=[AND($t13, $t24)], expr#26=[IS NOT 
TRUE($t25)], expr#27=[AND($t23, $t16, $t21, $t26)], expr#28=[AND($t13, $t15)], 
expr#29=[IS NOT TRUE($t28)], expr#30= [...]
-  EnumerableMergeJoin(condition=[=($0, $12)], joinType=[left])
+EnumerableCalc(expr#0..13=[{inputs}], expr#14=[<>($t9, $t8)], expr#15=[1], 
expr#16=[<=($t10, $t15)], expr#17=[<>($t0, $t11)], expr#18=[IS NULL($t12)], 
expr#19=[0], expr#20=[=($t8, $t19)], expr#21=[OR($t18, $t20)], expr#22=[IS NOT 
TRUE($t21)], expr#23=[AND($t14, $t16, $t17, $t22)], expr#24=[=($t10, $t15)], 
expr#25=[IS NOT NULL($t10)], expr#26=[AND($t14, $t25)], expr#27=[IS NOT 
TRUE($t26)], expr#28=[AND($t24, $t17, $t22, $t27)], expr#29=[AND($t14, $t16)], 
expr#30=[IS NOT TRUE($t29)], expr# [...]
+  EnumerableMergeJoin(condition=[=($0, $13)], joinType=[left])
     EnumerableTableScan(table=[[scott, EMP]])
-    EnumerableSort(sort0=[$4], dir0=[ASC])
-      EnumerableCalc(expr#0..3=[{inputs}], expr#4=[CAST($t3):INTEGER NOT 
NULL], expr#5=[true], c=[$t1], d=[$t2], m=[$t4], trueLiteral=[$t5], 
DEPTNO0=[$t0])
-        EnumerableAggregate(group=[{0}], c=[COUNT() FILTER $4], d=[COUNT($1) 
FILTER $3], m=[MIN($2) FILTER $4])
-          EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, 
$t4)], expr#6=[1], expr#7=[=($t3, $t6)], proj#0..2=[{exprs}], $g_0=[$t5], 
$g_1=[$t7])
-            EnumerableAggregate(group=[{0, 1}], groups=[[{0, 1}, {0}]], 
m=[MAX($1)], $g=[GROUPING($0, $1)])
+    EnumerableSort(sort0=[$5], dir0=[ASC])
+      EnumerableCalc(expr#0..3=[{inputs}], expr#4=[CAST($t1):BIGINT NOT NULL], 
expr#5=[CAST($t3):INTEGER NOT NULL], expr#6=[true], c=[$t4], d=[$t4], dd=[$t2], 
m=[$t5], trueLiteral=[$t6], DEPTNO0=[$t0])
+        EnumerableAggregate(group=[{0}], c=[MIN($2) FILTER $5], dd=[COUNT($1) 
FILTER $4], m=[MIN($3) FILTER $5])
+          EnumerableCalc(expr#0..4=[{inputs}], expr#5=[0], expr#6=[=($t4, 
$t5)], expr#7=[1], expr#8=[=($t4, $t7)], proj#0..3=[{exprs}], $g_0=[$t6], 
$g_1=[$t8])
+            EnumerableAggregate(group=[{0, 1}], groups=[[{0, 1}, {0}]], 
c=[COUNT()], m=[MAX($1)], $g=[GROUPING($0, $1)])
               EnumerableCalc(expr#0..2=[{inputs}], expr#3=[CAST($t0):SMALLINT 
NOT NULL], expr#4=[2], DEPTNO0=[$t3], EXPR$0=[$t4])
                 EnumerableTableScan(table=[[scott, DEPT]])
 !plan
@@ -2517,14 +2517,14 @@ from "scott".emp as emp1;
 select *
 from "scott".emp as emp1
 where comm <> some (select 2 from "scott".dept dept1 where dept1.deptno = 
emp1.empno);
-EnumerableCalc(expr#0..12=[{inputs}], expr#13=[<>($t9, $t8)], expr#14=[1], 
expr#15=[<=($t9, $t14)], expr#16=[AND($t13, $t15)], expr#17=[=($t9, $t14)], 
expr#18=[OR($t16, $t17)], expr#19=[<>($t6, $t10)], expr#20=[IS NULL($t11)], 
expr#21=[IS NULL($t6)], expr#22=[0], expr#23=[=($t8, $t22)], expr#24=[OR($t20, 
$t21, $t23)], expr#25=[IS NOT TRUE($t24)], expr#26=[AND($t18, $t19, $t25)], 
expr#27=[IS NOT TRUE($t18)], expr#28=[AND($t25, $t27)], expr#29=[OR($t26, 
$t28)], proj#0..7=[{exprs}], $condit [...]
-  EnumerableMergeJoin(condition=[=($0, $12)], joinType=[left])
+EnumerableCalc(expr#0..13=[{inputs}], expr#14=[<>($t9, $t8)], expr#15=[1], 
expr#16=[<=($t10, $t15)], expr#17=[AND($t14, $t16)], expr#18=[=($t10, $t15)], 
expr#19=[OR($t17, $t18)], expr#20=[<>($t6, $t11)], expr#21=[IS NULL($t12)], 
expr#22=[IS NULL($t6)], expr#23=[0], expr#24=[=($t8, $t23)], expr#25=[OR($t21, 
$t22, $t24)], expr#26=[IS NOT TRUE($t25)], expr#27=[AND($t19, $t20, $t26)], 
expr#28=[IS NOT TRUE($t19)], expr#29=[AND($t26, $t28)], expr#30=[OR($t27, 
$t29)], proj#0..7=[{exprs}], $cond [...]
+  EnumerableMergeJoin(condition=[=($0, $13)], joinType=[left])
     EnumerableTableScan(table=[[scott, EMP]])
-    EnumerableSort(sort0=[$4], dir0=[ASC])
-      EnumerableCalc(expr#0..3=[{inputs}], expr#4=[CAST($t3):INTEGER NOT 
NULL], expr#5=[true], c=[$t1], d=[$t2], m=[$t4], trueLiteral=[$t5], 
DEPTNO0=[$t0])
-        EnumerableAggregate(group=[{0}], c=[COUNT() FILTER $4], d=[COUNT($1) 
FILTER $3], m=[MIN($2) FILTER $4])
-          EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, 
$t4)], expr#6=[1], expr#7=[=($t3, $t6)], proj#0..2=[{exprs}], $g_0=[$t5], 
$g_1=[$t7])
-            EnumerableAggregate(group=[{0, 1}], groups=[[{0, 1}, {0}]], 
m=[MAX($1)], $g=[GROUPING($0, $1)])
+    EnumerableSort(sort0=[$5], dir0=[ASC])
+      EnumerableCalc(expr#0..3=[{inputs}], expr#4=[CAST($t1):BIGINT NOT NULL], 
expr#5=[CAST($t3):INTEGER NOT NULL], expr#6=[true], c=[$t4], d=[$t4], dd=[$t2], 
m=[$t5], trueLiteral=[$t6], DEPTNO0=[$t0])
+        EnumerableAggregate(group=[{0}], c=[MIN($2) FILTER $5], dd=[COUNT($1) 
FILTER $4], m=[MIN($3) FILTER $5])
+          EnumerableCalc(expr#0..4=[{inputs}], expr#5=[0], expr#6=[=($t4, 
$t5)], expr#7=[1], expr#8=[=($t4, $t7)], proj#0..3=[{exprs}], $g_0=[$t6], 
$g_1=[$t8])
+            EnumerableAggregate(group=[{0, 1}], groups=[[{0, 1}, {0}]], 
c=[COUNT()], m=[MAX($1)], $g=[GROUPING($0, $1)])
               EnumerableCalc(expr#0..2=[{inputs}], expr#3=[CAST($t0):SMALLINT 
NOT NULL], expr#4=[2], DEPTNO0=[$t3], EXPR$0=[$t4])
                 EnumerableTableScan(table=[[scott, DEPT]])
 !plan
@@ -2565,15 +2565,15 @@ from "scott".emp as emp1;
 select *
 from "scott".emp emp1
 where emp1.comm <> some (select comm from "scott".emp emp2 where emp2.sal = 
emp1.sal);
-EnumerableCalc(expr#0..12=[{inputs}], expr#13=[<>($t9, $t8)], expr#14=[1], 
expr#15=[<=($t9, $t14)], expr#16=[AND($t13, $t15)], expr#17=[=($t9, $t14)], 
expr#18=[OR($t16, $t17)], expr#19=[<>($t6, $t10)], expr#20=[IS NULL($t11)], 
expr#21=[IS NULL($t6)], expr#22=[0], expr#23=[=($t8, $t22)], expr#24=[OR($t20, 
$t21, $t23)], expr#25=[IS NOT TRUE($t24)], expr#26=[AND($t18, $t19, $t25)], 
expr#27=[IS NOT TRUE($t18)], expr#28=[AND($t25, $t27)], expr#29=[OR($t26, 
$t28)], proj#0..7=[{exprs}], $condit [...]
-  EnumerableMergeJoin(condition=[=($5, $12)], joinType=[left])
+EnumerableCalc(expr#0..13=[{inputs}], expr#14=[<>($t9, $t8)], expr#15=[1], 
expr#16=[<=($t10, $t15)], expr#17=[AND($t14, $t16)], expr#18=[=($t10, $t15)], 
expr#19=[OR($t17, $t18)], expr#20=[<>($t6, $t11)], expr#21=[IS NULL($t12)], 
expr#22=[IS NULL($t6)], expr#23=[0], expr#24=[=($t8, $t23)], expr#25=[OR($t21, 
$t22, $t24)], expr#26=[IS NOT TRUE($t25)], expr#27=[AND($t19, $t20, $t26)], 
expr#28=[IS NOT TRUE($t19)], expr#29=[AND($t26, $t28)], expr#30=[OR($t27, 
$t29)], proj#0..7=[{exprs}], $cond [...]
+  EnumerableMergeJoin(condition=[=($5, $13)], joinType=[left])
     EnumerableSort(sort0=[$5], dir0=[ASC])
       EnumerableTableScan(table=[[scott, EMP]])
-    EnumerableSort(sort0=[$4], dir0=[ASC])
-      EnumerableCalc(expr#0..3=[{inputs}], expr#4=[true], c=[$t1], d=[$t2], 
m=[$t3], trueLiteral=[$t4], SAL=[$t0])
-        EnumerableAggregate(group=[{0}], c=[COUNT() FILTER $4], d=[COUNT($1) 
FILTER $3], m=[MIN($2) FILTER $4])
-          EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, 
$t4)], expr#6=[1], expr#7=[=($t3, $t6)], proj#0..2=[{exprs}], $g_0=[$t5], 
$g_1=[$t7])
-            EnumerableAggregate(group=[{5, 6}], groups=[[{5, 6}, {5}]], 
m=[MAX($6)], $g=[GROUPING($5, $6)])
+    EnumerableSort(sort0=[$5], dir0=[ASC])
+      EnumerableCalc(expr#0..4=[{inputs}], expr#5=[CAST($t1):BIGINT NOT NULL], 
expr#6=[CAST($t2):BIGINT NOT NULL], expr#7=[true], c=[$t5], d=[$t6], dd=[$t3], 
m=[$t4], trueLiteral=[$t7], SAL=[$t0])
+        EnumerableAggregate(group=[{0}], c=[MIN($2) FILTER $6], d=[MIN($3) 
FILTER $6], dd=[COUNT($1) FILTER $5], m=[MIN($4) FILTER $6])
+          EnumerableCalc(expr#0..5=[{inputs}], expr#6=[0], expr#7=[=($t5, 
$t6)], expr#8=[1], expr#9=[=($t5, $t8)], proj#0..4=[{exprs}], $g_0=[$t7], 
$g_1=[$t9])
+            EnumerableAggregate(group=[{5, 6}], groups=[[{5, 6}, {5}]], 
c=[COUNT()], d=[COUNT($6)], m=[MAX($6)], $g=[GROUPING($5, $6)])
               EnumerableCalc(expr#0..7=[{inputs}], expr#8=[IS NOT NULL($t5)], 
proj#0..7=[{exprs}], $condition=[$t8])
                 EnumerableTableScan(table=[[scott, EMP]])
 !plan
@@ -2612,6 +2612,21 @@ from "scott".emp emp1;
 
 !ok
 
+WITH tb as (select array(SELECT * FROM (VALUES (TRUE), (NULL)) as x(a)) as a)
+SELECT TRUE = ALL (
+    SELECT b
+    FROM UNNEST(a) AS x1(b)
+) AS test
+FROM tb;
++------+
+| TEST |
++------+
+|      |
++------+
+(1 row)
+
+!ok
+
 # [CALCITE-4486] UNIQUE predicate
 !use scott
 !set outputformat mysql

Reply via email to