This is an automated email from the ASF dual-hosted git repository.

zhenchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/main by this push:
     new b04f744f7f [CALCITE-5132] Scalar IN subquery returns UNKNOWN instead 
of FALSE when key is partially NULL
b04f744f7f is described below

commit b04f744f7fbc6d5a4f12b2bb591be000265a88b2
Author: Zhen Chen <[email protected]>
AuthorDate: Thu Mar 5 06:43:08 2026 +0800

    [CALCITE-5132] Scalar IN subquery returns UNKNOWN instead of FALSE when key 
is partially NULL
---
 .../calcite/rel/rules/SubQueryRemoveRule.java      | 100 ++++++++++++--
 .../org/apache/calcite/test/JdbcAdapterTest.java   |  27 ++--
 core/src/test/resources/sql/sub-query.iq           | 147 ++++++++++++++++-----
 3 files changed, 213 insertions(+), 61 deletions(-)

diff --git 
a/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java 
b/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
index 9fa5f612ea..a1b59e952f 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
@@ -58,6 +58,7 @@
 import java.util.List;
 import java.util.Set;
 import java.util.stream.Collectors;
+import java.util.stream.IntStream;
 
 import static org.apache.calcite.util.Util.last;
 
@@ -588,7 +589,7 @@ private static RexNode rewriteIn(RexSubQuery e, 
Set<CorrelationId> variablesSet,
     //   when e.deptno is null then null       -- (2) key NULL check
     //   when dt.i is not null then true       -- (3) match found
     //   when ct.ck < ct.c then null           -- (4) NULLs exist in subquery
-    //   else false                             -- (5) no match
+    //   else false                            -- (5) no match
     //   end
     // from emp as e
     // left join (
@@ -602,7 +603,7 @@ private static RexNode rewriteIn(RexSubQuery e, 
Set<CorrelationId> variablesSet,
     // select e.deptno,
     //   case
     //   when dt.i is not null then true       -- (3) match found
-    //   else false                             -- (5) no match
+    //   else false                            -- (5) no match
     //   end
     // from emp as e
     // left join (select distinct deptno, true as i from emp) as dt
@@ -622,6 +623,43 @@ private static RexNode rewriteIn(RexSubQuery e, 
Set<CorrelationId> variablesSet,
     // from emp as e
     // inner join (select distinct deptno from emp) as dt
     //   on e.deptno = dt.deptno
+    //
+    // For multi-column IN where at least one key or RHS column is nullable, a
+    // single wildcard LEFT JOIN handles both exact and partial-null matches,
+    // while all-null RHS rows are excluded from dt and detected via ct.ck < 
ct.c:
+    //
+    // select e.empno, (e.empno, e.comm) in (select empno, comm from emp)
+    // from emp as e
+    //
+    // becomes
+    //
+    // select e.empno,
+    //   case
+    //   when ct.c = 0 then false                     -- (1) empty subquery 
check
+    //   when e.comm is null then null                -- (2) nullable key NULL 
check
+    //   when dt.i is not null and dt.em then true    -- (3) exact match (all 
cols non-null)
+    //   when dt.i is not null then null              -- (4) partial-null 
match (UNKNOWN)
+    //   when ct.ck < ct.c then null                  -- (5) all-null row 
exists (UNKNOWN)
+    //   else false                                   -- (6) no match
+    //   end
+    // from emp as e
+    // inner join (
+    //   select count(*) as c,
+    //          count(*) filter (where not (empno is null and comm is null)) 
as ck
+    //   from emp) as ct on true
+    // left join (
+    //   select empno, comm, true as i,
+    //          max(empno is not null and comm is not null) as em
+    //   from emp
+    //   where empno is not null or comm is not null  -- all-null rows excluded
+    //   group by empno, comm) as dt
+    //   on (e.empno = dt.empno or dt.empno is null)  -- wildcard per-column 
condition
+    //   and (e.comm = dt.comm or dt.comm is null)
+    //
+    // All-null rows (empno IS NULL AND comm IS NULL) are excluded from dt 
because
+    // the wildcard condition matches every LHS key, causing duplicate join 
output
+    // rows for LHS keys that also have exact group matches. They are instead
+    // caught by the global ct.ck < ct.c check (branch 5).
 
     builder.push(e.rel);
     final List<RexNode> fields = new ArrayList<>(builder.fields());
@@ -668,6 +706,7 @@ private static RexNode rewriteIn(RexSubQuery e, 
Set<CorrelationId> variablesSet,
     final RexLiteral unknownLiteral =
         builder.getRexBuilder().makeNullLiteral(trueLiteral.getType());
     boolean needsNullSafety = false;
+    boolean needsNullRowJoin = false;
     if (allLiterals) {
       final List<RexNode> conditions =
           Pair.zip(expressionOperands, fields).stream()
@@ -724,6 +763,7 @@ private static RexNode rewriteIn(RexSubQuery e, 
Set<CorrelationId> variablesSet,
           (logic == RelOptUtil.Logic.TRUE_FALSE_UNKNOWN
            || logic == RelOptUtil.Logic.UNKNOWN_AS_TRUE)
           && (!keyIsNulls.isEmpty() || anyFieldNullable);
+      needsNullRowJoin = needsNullSafety && fields.size() > 1;
 
       switch (logic) {
       case TRUE:
@@ -762,8 +802,21 @@ private static RexNode rewriteIn(RexSubQuery e, 
Set<CorrelationId> variablesSet,
         }
         // fall through
       default:
-        builder.aggregate(builder.groupKey(fields),
-            builder.literalAgg(true).as("i"));
+        if (needsNullRowJoin) {
+          // Exclude all-null rows from dt (they are detected by ct.ck < ct.c 
instead).
+          // Add em (exact-match) column to distinguish exact groups from 
partial-null groups.
+          List<RexNode> anyFieldNotNull =
+              
fields.stream().map(builder::isNotNull).collect(Collectors.toList());
+          builder.filter(builder.or(anyFieldNotNull));
+          RexNode allNotNull =
+              
builder.and(fields.stream().map(builder::isNotNull).collect(Collectors.toList()));
+          builder.aggregate(builder.groupKey(fields),
+              builder.literalAgg(true).as("i"),
+              builder.max(allNotNull).as("em"));
+        } else {
+          builder.aggregate(builder.groupKey(fields),
+              builder.literalAgg(true).as("i"));
+        }
       }
     }
 
@@ -773,10 +826,22 @@ private static RexNode rewriteIn(RexSubQuery e, 
Set<CorrelationId> variablesSet,
     }
     builder.as(dtAlias);
     int refOffset = offset;
-    final List<RexNode> conditions =
-        Pair.zip(expressionOperands, builder.fields()).stream()
-            .map(pair -> builder.equals(pair.left, RexUtil.shift(pair.right, 
refOffset)))
-            .collect(Collectors.toList());
+    final List<RexNode> conditions;
+    if (needsNullRowJoin) {
+      // Per-column wildcard condition: (key = col OR col IS NULL).
+      final List<RexNode> dtFields = builder.fields();
+      conditions = IntStream.range(0, expressionOperands.size())
+          .mapToObj(k -> {
+            RexNode col = RexUtil.shift(dtFields.get(k), refOffset);
+            return builder.or(builder.equals(expressionOperands.get(k), col),
+                builder.isNull(col));
+          })
+          .collect(Collectors.toList());
+    } else {
+      conditions = Pair.zip(expressionOperands, builder.fields()).stream()
+          .map(pair -> builder.equals(pair.left, RexUtil.shift(pair.right, 
refOffset)))
+          .collect(Collectors.toList());
+    }
     switch (logic) {
     case TRUE:
       builder.join(JoinRelType.INNER, builder.and(conditions), variablesSet);
@@ -826,16 +891,29 @@ private static RexNode rewriteIn(RexSubQuery e, 
Set<CorrelationId> variablesSet,
       operands.add(builder.isNotNull(builder.field(dtAlias, "cs")),
           trueLiteral);
     } else {
-      operands.add(builder.isNotNull(last(builder.fields())),
-          trueLiteral);
+      if (needsNullRowJoin) {
+        // em=true: exact match (all RHS cols non-null) → TRUE.
+        operands.add(
+            builder.and(
+                ImmutableList.of(
+                builder.isNotNull(builder.field(dtAlias, "i")),
+                builder.call(SqlStdOperatorTable.IS_TRUE,
+                    builder.field(dtAlias, "em")))),
+            trueLiteral);
+        // em=false: partial-null match → UNKNOWN.
+        operands.add(builder.isNotNull(builder.field(dtAlias, "i")), b);
+      } else {
+        operands.add(builder.isNotNull(builder.field(dtAlias, "i")),
+            trueLiteral);
+      }
     }
 
     if (!allLiterals) {
       switch (logic) {
       case TRUE_FALSE_UNKNOWN:
       case UNKNOWN_AS_TRUE:
-        // only reference ctAlias if we created it
         if (needsNullSafety) {
+          // ct.ck < ct.c: RHS has a null (single-col) or all-null row 
(multi-col) → UNKNOWN.
           operands.add(
               builder.lessThan(builder.field(ctAlias, "ck"),
                   builder.field(ctAlias, "c")),
diff --git a/core/src/test/java/org/apache/calcite/test/JdbcAdapterTest.java 
b/core/src/test/java/org/apache/calcite/test/JdbcAdapterTest.java
index 98c8edff3f..8d8d66371c 100644
--- a/core/src/test/java/org/apache/calcite/test/JdbcAdapterTest.java
+++ b/core/src/test/java/org/apache/calcite/test/JdbcAdapterTest.java
@@ -274,23 +274,20 @@ class JdbcAdapterTest {
   @Test void testNotPushDownNotIn() {
     CalciteAssert.model(JdbcTest.SCOTT_MODEL)
         .query("select * from dept where (deptno, dname) not in (select 
deptno, ename from emp)")
-        .explainContains("PLAN=EnumerableCalc(expr#0..7=[{inputs}], 
expr#8=[0], "
-            + "expr#9=[=($t3, $t8)], expr#10=[IS NULL($t7)], expr#11=[>=($t4, 
$t3)], "
-            + "expr#12=[IS NOT NULL($t1)], expr#13=[AND($t10, $t11, $t12)], "
-            + "expr#14=[OR($t9, $t13)], proj#0..2=[{exprs}], 
$condition=[$t14])\n"
-            + "  EnumerableMergeJoin(condition=[AND(=($0, $5), =($1, $6))], 
joinType=[left])\n"
-            + "    EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], 
dir1=[ASC])\n"
-            + "      EnumerableNestedLoopJoin(condition=[true], 
joinType=[inner])\n"
+        .explainContains("EnumerableNestedLoopJoin(condition=[AND(OR(IS 
NULL($5),"
+            + " =($0, $5)), OR(IS NULL($6), =($1, $6)))], joinType=[left])\n"
+            + "    EnumerableNestedLoopJoin(condition=[true], 
joinType=[inner])\n"
+            + "      JdbcToEnumerableConverter\n"
+            + "        JdbcTableScan(table=[[SCOTT, DEPT]])\n"
+            + "      EnumerableAggregate(group=[{}], c=[COUNT()], ck=[COUNT() 
FILTER $0])\n"
             + "        JdbcToEnumerableConverter\n"
-            + "          JdbcTableScan(table=[[SCOTT, DEPT]])\n"
-            + "        EnumerableAggregate(group=[{}], c=[COUNT()], 
ck=[COUNT() FILTER $0])\n"
-            + "          JdbcToEnumerableConverter\n"
-            + "            JdbcProject($f2=[OR(IS NOT NULL($7), IS NOT 
NULL($1))])\n"
-            + "              JdbcTableScan(table=[[SCOTT, EMP]])\n"
+            + "          JdbcProject($f2=[OR(IS NOT NULL($7), IS NOT 
NULL($1))])\n"
+            + "            JdbcTableScan(table=[[SCOTT, EMP]])\n"
             + "    JdbcToEnumerableConverter\n"
-            + "      JdbcSort(sort0=[$0], sort1=[$1], dir0=[ASC], 
dir1=[ASC])\n"
-            + "        JdbcAggregate(group=[{0, 1}], i=[LITERAL_AGG(true)])\n"
-            + "          JdbcProject(DEPTNO=[$7], 
ENAME=[CAST($1):VARCHAR(14)])\n"
+            + "      JdbcAggregate(group=[{0, 1}], i=[LITERAL_AGG(true)], 
em=[MAX($2)])\n"
+            + "        JdbcProject(DEPTNO=[$7], ENAME=[CAST($1):VARCHAR(14)],"
+            + " $f2=[AND(IS NOT NULL($7), IS NOT NULL($1))])\n"
+            + "          JdbcFilter(condition=[OR(IS NOT NULL($7), IS NOT 
NULL($1))])\n"
             + "            JdbcTableScan(table=[[SCOTT, EMP]])\n\n");
   }
 
diff --git a/core/src/test/resources/sql/sub-query.iq 
b/core/src/test/resources/sql/sub-query.iq
index 2c47ab5d05..51cab9f01e 100644
--- a/core/src/test/resources/sql/sub-query.iq
+++ b/core/src/test/resources/sql/sub-query.iq
@@ -4313,16 +4313,16 @@ select * from "scott".emp where (empno, deptno) not in 
((1, 2), (3, null));
 
 !ok
 !if (use_old_decorr) {
-EnumerableCalc(expr#0..14=[{inputs}], expr#15=[0], expr#16=[=($t8, $t15)], 
expr#17=[IS NULL($t14)], expr#18=[>=($t9, $t8)], expr#19=[IS NOT NULL($t7)], 
expr#20=[AND($t17, $t18, $t19)], expr#21=[OR($t16, $t20)], proj#0..7=[{exprs}], 
$condition=[$t21])
-  EnumerableMergeJoin(condition=[AND(=($10, $12), =($11, $13))], 
joinType=[left])
-    EnumerableSort(sort0=[$10], sort1=[$11], dir0=[ASC], dir1=[ASC])
-      EnumerableCalc(expr#0..9=[{inputs}], expr#10=[CAST($t0):INTEGER NOT 
NULL], expr#11=[CAST($t7):INTEGER], proj#0..11=[{exprs}])
+EnumerableCalc(expr#0..14=[{inputs}], expr#15=[0], expr#16=[=($t8, $t15)], 
expr#17=[IS NULL($t7)], expr#18=[IS NOT NULL($t13)], expr#19=[AND($t14, $t18)], 
expr#20=[<($t9, $t8)], expr#21=[OR($t17, $t19, $t18, $t20)], expr#22=[IS NOT 
TRUE($t21)], expr#23=[OR($t16, $t22)], proj#0..7=[{exprs}], $condition=[$t23])
+  EnumerableMergeJoin(condition=[AND(=($10, $11), OR(IS NULL($12), 
=(CAST($7):INTEGER, $12)))], joinType=[left])
+    EnumerableSort(sort0=[$10], dir0=[ASC])
+      EnumerableCalc(expr#0..9=[{inputs}], expr#10=[CAST($t0):INTEGER NOT 
NULL], proj#0..10=[{exprs}])
         EnumerableNestedLoopJoin(condition=[true], joinType=[inner])
           EnumerableTableScan(table=[[scott, EMP]])
           EnumerableAggregate(group=[{}], c=[COUNT()], ck=[COUNT() FILTER $0])
             EnumerableValues(tuples=[[{ true }, { true }]])
-    EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])
-      EnumerableCalc(expr#0..1=[{inputs}], expr#2=[true], proj#0..2=[{exprs}])
+    EnumerableSort(sort0=[$0], dir0=[ASC])
+      EnumerableCalc(expr#0..1=[{inputs}], expr#2=[true], expr#3=[IS NOT 
NULL($t1)], proj#0..3=[{exprs}])
         EnumerableValues(tuples=[[{ 3, null }, { 1, 2 }]])
 !plan
 !}
@@ -4337,18 +4337,15 @@ select * from "scott".emp where (mgr, deptno) not in 
((1, 2), (3, null), (cast(n
 
 !ok
 !if (use_old_decorr) {
-EnumerableCalc(expr#0..14=[{inputs}], expr#15=[0], expr#16=[=($t8, $t15)], 
expr#17=[IS NULL($t14)], expr#18=[>=($t9, $t8)], expr#19=[IS NOT NULL($t3)], 
expr#20=[IS NOT NULL($t7)], expr#21=[AND($t17, $t18, $t19, $t20)], 
expr#22=[OR($t16, $t21)], proj#0..7=[{exprs}], $condition=[$t22])
-  EnumerableMergeJoin(condition=[AND(=($10, $12), =($11, $13))], 
joinType=[left])
-    EnumerableSort(sort0=[$10], sort1=[$11], dir0=[ASC], dir1=[ASC])
-      EnumerableCalc(expr#0..9=[{inputs}], expr#10=[CAST($t3):INTEGER], 
expr#11=[CAST($t7):INTEGER], proj#0..11=[{exprs}])
-        EnumerableNestedLoopJoin(condition=[true], joinType=[inner])
-          EnumerableTableScan(table=[[scott, EMP]])
-          EnumerableAggregate(group=[{}], c=[COUNT()], ck=[COUNT() FILTER $0])
-            EnumerableCalc(expr#0..1=[{inputs}], expr#2=[IS NOT NULL($t0)], 
expr#3=[IS NOT NULL($t1)], expr#4=[OR($t2, $t3)], $f2=[$t4])
-              EnumerableValues(tuples=[[{ 3, null }, { null, null }, { 1, 2 
}]])
-    EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])
-      EnumerableCalc(expr#0..1=[{inputs}], expr#2=[true], proj#0..2=[{exprs}])
-        EnumerableValues(tuples=[[{ 3, null }, { null, null }, { 1, 2 }]])
+EnumerableCalc(expr#0..13=[{inputs}], expr#14=[0], expr#15=[=($t8, $t14)], 
expr#16=[IS NULL($t3)], expr#17=[IS NULL($t7)], expr#18=[IS NOT NULL($t12)], 
expr#19=[AND($t13, $t18)], expr#20=[<($t9, $t8)], expr#21=[OR($t16, $t17, $t19, 
$t18, $t20)], expr#22=[IS NOT TRUE($t21)], expr#23=[OR($t15, $t22)], 
proj#0..7=[{exprs}], $condition=[$t23])
+  EnumerableNestedLoopJoin(condition=[AND(OR(IS NULL($10), =(CAST($3):INTEGER, 
$10)), OR(IS NULL($11), =(CAST($7):INTEGER, $11)))], joinType=[left])
+    EnumerableNestedLoopJoin(condition=[true], joinType=[inner])
+      EnumerableTableScan(table=[[scott, EMP]])
+      EnumerableAggregate(group=[{}], c=[COUNT()], ck=[COUNT() FILTER $0])
+        EnumerableCalc(expr#0..1=[{inputs}], expr#2=[IS NOT NULL($t0)], 
expr#3=[IS NOT NULL($t1)], expr#4=[OR($t2, $t3)], $f2=[$t4])
+          EnumerableValues(tuples=[[{ 3, null }, { null, null }, { 1, 2 }]])
+    EnumerableCalc(expr#0..1=[{inputs}], expr#2=[true], expr#3=[IS NOT 
NULL($t0)], expr#4=[IS NOT NULL($t1)], expr#5=[AND($t3, $t4)], expr#6=[OR($t3, 
$t4)], proj#0..2=[{exprs}], $f20=[$t5], $condition=[$t6])
+      EnumerableValues(tuples=[[{ 3, null }, { null, null }, { 1, 2 }]])
 !plan
 !}
 
@@ -4382,7 +4379,7 @@ select * from "scott".emp where (empno, deptno) not in 
((7369, 20), (7499, 30));
 
 !ok
 !if (use_old_decorr) {
-EnumerableCalc(expr#0..14=[{inputs}], expr#15=[0], expr#16=[=($t8, $t15)], 
expr#17=[IS NULL($t14)], expr#18=[>=($t9, $t8)], expr#19=[IS NOT NULL($t7)], 
expr#20=[AND($t17, $t18, $t19)], expr#21=[OR($t16, $t20)], proj#0..7=[{exprs}], 
$condition=[$t21])
+EnumerableCalc(expr#0..15=[{inputs}], expr#16=[0], expr#17=[=($t8, $t16)], 
expr#18=[IS NULL($t7)], expr#19=[IS NOT NULL($t14)], expr#20=[AND($t15, $t19)], 
expr#21=[<($t9, $t8)], expr#22=[OR($t18, $t20, $t19, $t21)], expr#23=[IS NOT 
TRUE($t22)], expr#24=[OR($t17, $t23)], proj#0..7=[{exprs}], $condition=[$t24])
   EnumerableMergeJoin(condition=[AND(=($10, $12), =($11, $13))], 
joinType=[left])
     EnumerableSort(sort0=[$10], sort1=[$11], dir0=[ASC], dir1=[ASC])
       EnumerableCalc(expr#0..9=[{inputs}], expr#10=[CAST($t0):INTEGER NOT 
NULL], expr#11=[CAST($t7):INTEGER], proj#0..11=[{exprs}])
@@ -4392,7 +4389,7 @@ EnumerableCalc(expr#0..14=[{inputs}], expr#15=[0], 
expr#16=[=($t8, $t15)], expr#
             EnumerableCalc(expr#0..1=[{inputs}], expr#2=[true], $f2=[$t2])
               EnumerableValues(tuples=[[{ 7369, 20 }, { 7499, 30 }]])
     EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])
-      EnumerableCalc(expr#0..1=[{inputs}], expr#2=[true], proj#0..2=[{exprs}])
+      EnumerableCalc(expr#0..1=[{inputs}], expr#2=[true], proj#0..2=[{exprs}], 
$f20=[$t2])
         EnumerableValues(tuples=[[{ 7369, 20 }, { 7499, 30 }]])
 !plan
 !}
@@ -5256,7 +5253,7 @@ select comm, (comm, comm) in ((500, 500), (300, 300), (0, 
0)) from emp;
 !ok
 
 !if (use_old_decorr) {
-EnumerableCalc(expr#0..8=[{inputs}], expr#9=[IS NULL($t1)], 
expr#10=[null:BOOLEAN], expr#11=[0], expr#12=[<>($t2, $t11)], expr#13=[AND($t9, 
$t10, $t12)], expr#14=[IS NOT NULL($t8)], expr#15=[IS NOT NULL($t1)], 
expr#16=[AND($t14, $t12, $t15)], expr#17=[<($t3, $t2)], expr#18=[IS NULL($t8)], 
expr#19=[AND($t17, $t10, $t12, $t15, $t18)], expr#20=[OR($t13, $t16, $t19)], 
COMM=[$t1], EXPR$1=[$t20])
+EnumerableCalc(expr#0..9=[{inputs}], expr#10=[IS NULL($t1)], 
expr#11=[null:BOOLEAN], expr#12=[0], expr#13=[<>($t2, $t12)], 
expr#14=[AND($t10, $t11, $t13)], expr#15=[IS NOT NULL($t8)], expr#16=[AND($t9, 
$t15)], expr#17=[IS TRUE($t16)], expr#18=[IS NOT NULL($t1)], expr#19=[AND($t17, 
$t13, $t18)], expr#20=[<($t3, $t2)], expr#21=[OR($t15, $t20)], expr#22=[IS NOT 
TRUE($t16)], expr#23=[AND($t21, $t11, $t13, $t18, $t22)], expr#24=[OR($t14, 
$t19, $t23)], COMM=[$t1], EXPR$1=[$t24])
   EnumerableMergeJoin(condition=[AND(=($4, $6), =($5, $7))], joinType=[left])
     EnumerableSort(sort0=[$4], sort1=[$5], dir0=[ASC], dir1=[ASC])
       EnumerableCalc(expr#0..3=[{inputs}], expr#4=[CAST($t1):DECIMAL(12, 2)], 
proj#0..4=[{exprs}], COMM1=[$t4])
@@ -5266,7 +5263,7 @@ EnumerableCalc(expr#0..8=[{inputs}], expr#9=[IS 
NULL($t1)], expr#10=[null:BOOLEA
           EnumerableAggregate(group=[{}], c=[COUNT()], ck=[COUNT() FILTER $0])
             EnumerableValues(tuples=[[{ true }, { true }, { true }]])
     EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])
-      EnumerableCalc(expr#0..1=[{inputs}], expr#2=[true], proj#0..2=[{exprs}])
+      EnumerableCalc(expr#0..1=[{inputs}], expr#2=[true], proj#0..2=[{exprs}], 
$f20=[$t2])
         EnumerableValues(tuples=[[{ 500.00, 500.00 }, { 300.00, 300.00 }, { 
0.00, 0.00 }]])
 !plan
 !}
@@ -5296,19 +5293,16 @@ select comm, (comm, comm) in ((500, 500), (300, 300), 
(0, 0), (null , null)) fro
 
 !ok
 !if (use_old_decorr) {
-EnumerableCalc(expr#0..8=[{inputs}], expr#9=[IS NULL($t1)], 
expr#10=[null:BOOLEAN], expr#11=[0], expr#12=[<>($t2, $t11)], expr#13=[AND($t9, 
$t10, $t12)], expr#14=[IS NOT NULL($t8)], expr#15=[IS NOT NULL($t1)], 
expr#16=[AND($t14, $t12, $t15)], expr#17=[<($t3, $t2)], expr#18=[IS NULL($t8)], 
expr#19=[AND($t17, $t10, $t12, $t15, $t18)], expr#20=[OR($t13, $t16, $t19)], 
COMM=[$t1], EXPR$1=[$t20])
-  EnumerableMergeJoin(condition=[AND(=($4, $6), =($5, $7))], joinType=[left])
-    EnumerableSort(sort0=[$4], sort1=[$5], dir0=[ASC], dir1=[ASC])
-      EnumerableCalc(expr#0..3=[{inputs}], expr#4=[CAST($t1):DECIMAL(12, 2)], 
proj#0..4=[{exprs}], COMM1=[$t4])
-        EnumerableNestedLoopJoin(condition=[true], joinType=[inner])
-          EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], COMM=[$t6])
-            EnumerableTableScan(table=[[scott, EMP]])
-          EnumerableAggregate(group=[{}], c=[COUNT()], ck=[COUNT() FILTER $0])
-            EnumerableCalc(expr#0..1=[{inputs}], expr#2=[IS NOT NULL($t0)], 
expr#3=[IS NOT NULL($t1)], expr#4=[OR($t2, $t3)], $f2=[$t4])
-              EnumerableValues(tuples=[[{ 500.00, 500.00 }, { 300.00, 300.00 
}, { 0.00, 0.00 }, { null, null }]])
-    EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])
-      EnumerableCalc(expr#0..1=[{inputs}], expr#2=[true], proj#0..2=[{exprs}])
-        EnumerableValues(tuples=[[{ 500.00, 500.00 }, { 300.00, 300.00 }, { 
0.00, 0.00 }, { null, null }]])
+EnumerableCalc(expr#0..7=[{inputs}], expr#8=[IS NULL($t1)], 
expr#9=[null:BOOLEAN], expr#10=[0], expr#11=[<>($t2, $t10)], expr#12=[AND($t8, 
$t9, $t11)], expr#13=[IS NOT NULL($t6)], expr#14=[AND($t7, $t13)], expr#15=[IS 
TRUE($t14)], expr#16=[IS NOT NULL($t1)], expr#17=[AND($t15, $t11, $t16)], 
expr#18=[<($t3, $t2)], expr#19=[OR($t13, $t18)], expr#20=[IS NOT TRUE($t14)], 
expr#21=[AND($t19, $t9, $t11, $t16, $t20)], expr#22=[OR($t12, $t17, $t21)], 
COMM=[$t1], EXPR$1=[$t22])
+  EnumerableNestedLoopJoin(condition=[AND(OR(IS NULL($4), 
=(CAST($1):DECIMAL(12, 2), $4)), OR(IS NULL($5), =(CAST($1):DECIMAL(12, 2), 
$5)))], joinType=[left])
+    EnumerableNestedLoopJoin(condition=[true], joinType=[inner])
+      EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], COMM=[$t6])
+        EnumerableTableScan(table=[[scott, EMP]])
+      EnumerableAggregate(group=[{}], c=[COUNT()], ck=[COUNT() FILTER $0])
+        EnumerableCalc(expr#0..1=[{inputs}], expr#2=[IS NOT NULL($t0)], 
expr#3=[IS NOT NULL($t1)], expr#4=[OR($t2, $t3)], $f2=[$t4])
+          EnumerableValues(tuples=[[{ 500.00, 500.00 }, { 300.00, 300.00 }, { 
0.00, 0.00 }, { null, null }]])
+    EnumerableCalc(expr#0..1=[{inputs}], expr#2=[true], expr#3=[IS NOT 
NULL($t0)], expr#4=[IS NOT NULL($t1)], expr#5=[AND($t3, $t4)], expr#6=[OR($t3, 
$t4)], proj#0..2=[{exprs}], $f20=[$t5], $condition=[$t6])
+      EnumerableValues(tuples=[[{ 500.00, 500.00 }, { 300.00, 300.00 }, { 
0.00, 0.00 }, { null, null }]])
 !plan
 !}
 
@@ -9075,4 +9069,87 @@ where exists (
 (14 rows)
 
 !ok
+
+# [CALCITE-5132] Scalar IN subquery returns UNKNOWN instead of FALSE when key 
is partially NULL.
+# Case 1: Default insubquerythreshold=20
+!if (use_old_decorr) {
+select empno, deptno, (empno, deptno) in ((7521, null)) from "scott".emp;
++-------+--------+--------+
+| EMPNO | DEPTNO | EXPR$2 |
++-------+--------+--------+
+|  7369 |     20 | false  |
+|  7499 |     30 | false  |
+|  7521 |     30 |        |
+|  7566 |     20 | false  |
+|  7654 |     30 | false  |
+|  7698 |     30 | false  |
+|  7782 |     10 | false  |
+|  7788 |     20 | false  |
+|  7839 |     10 | false  |
+|  7844 |     30 | false  |
+|  7876 |     20 | false  |
+|  7900 |     30 | false  |
+|  7902 |     20 | false  |
+|  7934 |     10 | false  |
++-------+--------+--------+
+(14 rows)
+
+!ok
+
+select v,
+  row(v, 0) in (values (1, 0), (2, cast(null as integer))) as r
+from (values (1), (2), (3)) as t(v);
++---+-------+
+| V | R     |
++---+-------+
+| 1 | true  |
+| 2 |       |
+| 3 | false |
++---+-------+
+(3 rows)
+
+!ok
+
+# [CALCITE-5132] Scalar IN subquery returns UNKNOWN instead of FALSE when key 
is partially NULL.
+# Case 2: insubquerythreshold=0
+!set insubquerythreshold 0
+select empno, deptno, (empno, deptno) in ((7521, null)) from "scott".emp;
++-------+--------+--------+
+| EMPNO | DEPTNO | EXPR$2 |
++-------+--------+--------+
+|  7369 |     20 | false  |
+|  7499 |     30 | false  |
+|  7521 |     30 |        |
+|  7566 |     20 | false  |
+|  7654 |     30 | false  |
+|  7698 |     30 | false  |
+|  7782 |     10 | false  |
+|  7788 |     20 | false  |
+|  7839 |     10 | false  |
+|  7844 |     30 | false  |
+|  7876 |     20 | false  |
+|  7900 |     30 | false  |
+|  7902 |     20 | false  |
+|  7934 |     10 | false  |
++-------+--------+--------+
+(14 rows)
+
+!ok
+
+select v,
+  row(v, 0) in (values (1, 0), (2, cast(null as integer))) as r
+from (values (1), (2), (3)) as t(v);
++---+-------+
+| V | R     |
++---+-------+
+| 1 | true  |
+| 2 |       |
+| 3 | false |
++---+-------+
+(3 rows)
+
+!ok
+!set insubquerythreshold 20
+!}
+
 # End sub-query.iq

Reply via email to