(calcite) branch main updated: [CALCITE-7317] SubQueryRemoveRule should skip NULL-safety checks for IN subqueries when both the keys and the subquery columns are NOT NULL

asolimando Tue, 09 Dec 2025 08:14:46 -0800

This is an automated email from the ASF dual-hosted git repository.

asolimando pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git



The following commit(s) were added to refs/heads/main by this push:
     new 02bfbee0dd [CALCITE-7317] SubQueryRemoveRule should skip NULL-safety 
checks for IN subqueries when both the keys and the subquery columns are NOT 
NULL
02bfbee0dd is described below

commit 02bfbee0dd36f440aac3ec3a5250701250925e35
Author: Alessandro Solimando <[email protected]>
AuthorDate: Tue Dec 9 15:19:56 2025 +0100

    [CALCITE-7317] SubQueryRemoveRule should skip NULL-safety checks for IN 
subqueries when both the keys and the subquery columns are NOT NULL
---
 .../calcite/rel/rules/SubQueryRemoveRule.java      | 130 +++++++++--------
 .../org/apache/calcite/test/JdbcAdapterTest.java   |   4 +-
 .../org/apache/calcite/test/RelOptRulesTest.java   |  33 +++++
 .../org/apache/calcite/test/RelOptRulesTest.xml    | 154 ++++++++++++++-------
 core/src/test/resources/sql/sub-query.iq           |  45 +++++-
 5 files changed, 260 insertions(+), 106 deletions(-)

diff --git 
a/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java 
b/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
index 6f13723170..8ed8c287b4 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
@@ -583,11 +583,11 @@ private static RexNode rewriteIn(RexSubQuery e, 
Set<CorrelationId> variablesSet,
     //
     // select e.deptno,
     //   case
-    //   when ct.c = 0 then false
-    //   when e.deptno is null then null
-    //   when dt.i is not null then true
-    //   when ct.ck < ct.c then null
-    //   else false
+    //   when ct.c = 0 then false              -- (1) empty subquery check
+    //   when e.deptno is null then null       -- (2) key NULL check
+    //   when dt.i is not null then true       -- (3) match found
+    //   when ct.ck < ct.c then null           -- (4) NULLs exist in subquery
+    //   else false                             -- (5) no match
     //   end
     // from emp as e
     // left join (
@@ -595,37 +595,32 @@ private static RexNode rewriteIn(RexSubQuery e, 
Set<CorrelationId> variablesSet,
     //   cross join (select distinct deptno, true as i from emp)) as dt
     //   on e.deptno = dt.deptno
     //
-    // If keys are not null we can remove "ct" and simplify to
+    // If both keys (e.deptno) and subquery columns (deptno) are NOT NULL,
+    // we can drop checks (1), (2), and (4), which eliminates the need for ct:
     //
     // select e.deptno,
     //   case
-    //   when dt.i is not null then true
-    //   else false
+    //   when dt.i is not null then true       -- (3) match found
+    //   else false                             -- (5) no match
     //   end
     // from emp as e
     // left join (select distinct deptno, true as i from emp) as dt
     //   on e.deptno = dt.deptno
     //
-    // We could further simplify to
-    //
-    // select e.deptno,
-    //   dt.i is not null
-    // from emp as e
-    // left join (select distinct deptno, true as i from emp) as dt
-    //   on e.deptno = dt.deptno
+    // Check (1) is not needed: if the subquery is empty, all dt.i are NULL,
+    // and the LEFT JOIN pattern correctly returns FALSE for IN (TRUE for NOT 
IN).
     //
-    // but have not yet.
+    // NULL-safety checks are required if either the keys or the subquery
+    // columns are nullable, due to SQL three-valued logic.
     //
-    // If the logic is TRUE we can just kill the record if the condition
-    // evaluates to FALSE or UNKNOWN. Thus the query simplifies to an inner
-    // join:
+    // If the logic is TRUE (as opposed to TRUE_FALSE_UNKNOWN), we only care 
about
+    // matches, so the query simplifies to an inner join regardless of 
nullability:
     //
     // select e.deptno,
     //   true
     // from emp as e
     // inner join (select distinct deptno from emp) as dt
     //   on e.deptno = dt.deptno
-    //
 
     builder.push(e.rel);
     final List<RexNode> fields = new ArrayList<>(builder.fields());
@@ -671,6 +666,7 @@ private static RexNode rewriteIn(RexSubQuery e, 
Set<CorrelationId> variablesSet,
     final RexLiteral falseLiteral = builder.literal(false);
     final RexLiteral unknownLiteral =
         builder.getRexBuilder().makeNullLiteral(trueLiteral.getType());
+    boolean needsNullSafety = false;
     if (allLiterals) {
       final List<RexNode> conditions =
           Pair.zip(expressionOperands, fields).stream()
@@ -718,39 +714,51 @@ private static RexNode rewriteIn(RexSubQuery e, 
Set<CorrelationId> variablesSet,
       expressionOperands.clear();
       fields.clear();
     } else {
+      boolean anyFieldNullable = fields.stream()
+          .anyMatch(field -> field.getType().isNullable());
+
+      // we can skip NULL-safety checks only if both keys
+      // and subquery columns are NOT NULL
+      needsNullSafety =
+          (logic == RelOptUtil.Logic.TRUE_FALSE_UNKNOWN
+           || logic == RelOptUtil.Logic.UNKNOWN_AS_TRUE)
+          && (!keyIsNulls.isEmpty() || anyFieldNullable);
+
       switch (logic) {
       case TRUE:
         builder.aggregate(builder.groupKey(fields));
         break;
       case TRUE_FALSE_UNKNOWN:
       case UNKNOWN_AS_TRUE:
-        // Builds the cross join
-        // Some databases don't support use FILTER clauses for aggregate 
functions
-        // like {@code COUNT(*) FILTER (WHERE not(a is null))}
-        // So use count(*) when only one column
-        if (builder.fields().size() <= 1) {
-          builder.aggregate(builder.groupKey(),
-              builder.count(false, "c"),
-              builder.count(builder.fields()).as("ck"));
-        } else {
-          builder.aggregate(builder.groupKey(),
-              builder.count(false, "c"),
-              builder.count()
-                  .filter(builder
-                      .not(builder
-                          .and(builder.fields().stream()
-                              .map(builder::isNull)
-                              .collect(Collectors.toList()))))
-                  .as("ck"));
-        }
-        builder.as(ctAlias);
-        if (!variablesSet.isEmpty()) {
-          builder.join(JoinRelType.LEFT, trueLiteral, variablesSet);
-        } else {
-          builder.join(JoinRelType.INNER, trueLiteral, variablesSet);
+        if (needsNullSafety) {
+          // Builds the cross join
+          // Some databases don't support use FILTER clauses for aggregate 
functions
+          // like {@code COUNT(*) FILTER (WHERE not(a is null))}
+          // So use count(*) when only one column
+          if (builder.fields().size() <= 1) {
+            builder.aggregate(builder.groupKey(),
+                builder.count(false, "c"),
+                builder.count(builder.fields()).as("ck"));
+          } else {
+            builder.aggregate(builder.groupKey(),
+                builder.count(false, "c"),
+                builder.count()
+                    .filter(builder
+                        .not(builder
+                            .and(builder.fields().stream()
+                                .map(builder::isNull)
+                                .collect(Collectors.toList()))))
+                    .as("ck"));
+          }
+          builder.as(ctAlias);
+          if (!variablesSet.isEmpty()) {
+            builder.join(JoinRelType.LEFT, trueLiteral, variablesSet);
+          } else {
+            builder.join(JoinRelType.INNER, trueLiteral, variablesSet);
+          }
+          offset += 2;
+          builder.push(e.rel);
         }
-        offset += 2;
-        builder.push(e.rel);
         // fall through
       default:
         builder.aggregate(builder.groupKey(fields),
@@ -797,9 +805,12 @@ private static RexNode rewriteIn(RexSubQuery e, 
Set<CorrelationId> variablesSet,
             builder.equals(builder.field(dtAlias, "cs"), falseLiteral),
             b);
       } else {
-        operands.add(
-            builder.equals(builder.field(ctAlias, "c"), builder.literal(0)),
-            falseLiteral);
+        // only reference ctAlias if we created it
+        if (needsNullSafety) {
+          operands.add(
+              builder.equals(builder.field(ctAlias, "c"), builder.literal(0)),
+              falseLiteral);
+        }
       }
       break;
     default:
@@ -822,17 +833,28 @@ private static RexNode rewriteIn(RexSubQuery e, 
Set<CorrelationId> variablesSet,
       switch (logic) {
       case TRUE_FALSE_UNKNOWN:
       case UNKNOWN_AS_TRUE:
-        operands.add(
-            builder.lessThan(builder.field(ctAlias, "ck"),
-                builder.field(ctAlias, "c")),
-            b);
+        // only reference ctAlias if we created it
+        if (needsNullSafety) {
+          operands.add(
+              builder.lessThan(builder.field(ctAlias, "ck"),
+                  builder.field(ctAlias, "c")),
+              b);
+        }
         break;
       default:
         break;
       }
     }
     operands.add(falseLiteral);
-    return builder.call(SqlStdOperatorTable.CASE, operands.build());
+    RexNode result = builder.call(SqlStdOperatorTable.CASE, operands.build());
+
+    // When we skip NULL-safety checks, the result might be NOT NULL
+    // but the original IN expression was nullable, so we need to preserve that
+    if (e.getType().isNullable() && !result.getType().isNullable()) {
+      result = builder.getRexBuilder().makeCast(e.getType(), result, false, 
false);
+    }
+
+    return result;
   }
 
   /** Returns a reference to a particular field, by offset, across several
diff --git a/core/src/test/java/org/apache/calcite/test/JdbcAdapterTest.java 
b/core/src/test/java/org/apache/calcite/test/JdbcAdapterTest.java
index 51e2aca144..a94cad903b 100644
--- a/core/src/test/java/org/apache/calcite/test/JdbcAdapterTest.java
+++ b/core/src/test/java/org/apache/calcite/test/JdbcAdapterTest.java
@@ -248,7 +248,7 @@ class JdbcAdapterTest {
         .query("select * from dept where deptno not in (select deptno from 
emp)")
         .explainContains("PLAN=JdbcToEnumerableConverter\n"
             + "  JdbcProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2])\n"
-            + "    JdbcFilter(condition=[OR(=($3, 0), AND(IS NULL($6), >=($4, 
$3)))])\n"
+            + "    JdbcFilter(condition=[OR(AND(IS NULL($6), >=($4, $3)), 
=($3, 0))])\n"
             + "      JdbcJoin(condition=[=($0, $5)], joinType=[left])\n"
             + "        JdbcJoin(condition=[true], joinType=[inner])\n"
             + "          JdbcTableScan(table=[[SCOTT, DEPT]])\n"
@@ -263,7 +263,7 @@ class JdbcAdapterTest {
             + "FROM \"SCOTT\".\"EMP\") AS \"t\"\n"
             + "LEFT JOIN (SELECT \"DEPTNO\", TRUE AS \"i\"\n"
             + "FROM \"SCOTT\".\"EMP\"\nGROUP BY \"DEPTNO\") AS \"t0\" ON 
\"DEPT\".\"DEPTNO\" = \"t0\".\"DEPTNO\"\n"
-            + "WHERE \"t\".\"c\" = 0 OR \"t0\".\"i\" IS NULL AND \"t\".\"ck\" 
>= \"t\".\"c\"");
+            + "WHERE \"t0\".\"i\" IS NULL AND \"t\".\"ck\" >= \"t\".\"c\" OR 
\"t\".\"c\" = 0");
   }
 
   @Test void testNotPushDownNotIn() {
diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java 
b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
index ed3ffa32b0..33b72492f7 100644
--- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -5621,6 +5621,39 @@ private void checkEmptyJoin(RelOptFixture f) {
     sql(sql).withSubQueryRules().check();
   }
 
+  /** Test case for
+   * <a 
href="https://issues.apache.org/jira/browse/CALCITE-7317";>[CALCITE-7317]
+   * SubQueryRemoveRule should skip NULL-safety checks for IN subqueries when
+   * both the keys and the subquery columns are NOT NULL</a>. */
+  @Test void testInOptimizationBothNotNull() {
+    final String sql = "select * from emp as e1\n"
+        + "where empno in (\n"
+        + "  select empno from emp e2)";
+    sql(sql).withSubQueryRules().check();
+  }
+
+  /** Test case for
+   * <a 
href="https://issues.apache.org/jira/browse/CALCITE-7317";>[CALCITE-7317]
+   * SubQueryRemoveRule should skip NULL-safety checks for IN subqueries when
+   * both the keys and the subquery columns are NOT NULL</a>. */
+  @Test void testNotInNullableSubqueryColumn() {
+    final String sql = "select * from empnullables as e1\n"
+        + "where coalesce(deptno, 0) not in (\n"
+        + "  select deptno from empnullables e2)";
+    sql(sql).withSubQueryRules().check();
+  }
+
+  /** Test case for
+   * <a 
href="https://issues.apache.org/jira/browse/CALCITE-7317";>[CALCITE-7317]
+   * SubQueryRemoveRule should skip NULL-safety checks for IN subqueries when
+   * both the keys and the subquery columns are NOT NULL</a>. */
+  @Test void testNotInNullableKey() {
+    final String sql = "select * from empnullables as e1\n"
+        + "where deptno not in (\n"
+        + "  select coalesce(deptno, 0) from empnullables e2)";
+    sql(sql).withSubQueryRules().check();
+  }
+
   @Test void testSomeWithGreaterThanNoRowSubQuery() {
     final String sql = "select * from dept as d\n"
         + "where deptno > some(\n"
diff --git 
a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml 
b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index df13f4c78a..ab4ba1281b 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -6363,6 +6363,33 @@ LogicalProject(ENAME=[$26], NAME=[$16])
           LogicalJoin(condition=[=($9, $1)], joinType=[inner])
             LogicalTableScan(table=[[CATALOG, SALES, EMP]])
             LogicalTableScan(table=[[CATALOG, SALES, BONUS]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testInOptimizationBothNotNull">
+    <Resource name="sql">
+      <![CDATA[select * from emp as e1
+where empno in (
+  select empno from emp e2)]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], 
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+  LogicalFilter(condition=[IN($0, {
+LogicalProject(EMPNO=[$0])
+  LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+})])
+    LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], 
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+  LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], 
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+    LogicalJoin(condition=[=($0, $9)], joinType=[inner])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+      LogicalProject(EMPNO=[$0])
+        LogicalTableScan(table=[[CATALOG, SALES, EMP]])
 ]]>
     </Resource>
   </TestCase>
@@ -8493,14 +8520,10 @@ LogicalProject(DEPTNO=[$0])
     <Resource name="planAfter">
       <![CDATA[
 LogicalProject(EMPNO=[$0])
-  LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], 
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$13], NAME=[$14])
-    LogicalJoin(condition=[OR(=($9, 0), AND(IS NULL($12), >=($10, $9)))], 
joinType=[inner])
-      LogicalJoin(condition=[=($7, $11)], joinType=[left])
-        LogicalJoin(condition=[true], joinType=[inner])
-          LogicalTableScan(table=[[CATALOG, SALES, EMP]])
-          LogicalProject(c=[$0], ck=[$0])
-            LogicalAggregate(group=[{}], c=[COUNT()])
-              LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+  LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], 
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[$11], NAME=[$12])
+    LogicalJoin(condition=[IS NULL($10)], joinType=[inner])
+      LogicalJoin(condition=[=($7, $9)], joinType=[left])
+        LogicalTableScan(table=[[CATALOG, SALES, EMP]])
         LogicalProject(DEPTNO=[$0], i=[true])
           LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
       LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
@@ -10705,6 +10728,72 @@ LogicalProject(USER=[USER])
 LogicalAggregate(group=[{0}], EXPR$1=[SUM($1)])
   LogicalProject(NAME=[$1], DEPTNO=[$0])
     LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testNotInNullableKey">
+    <Resource name="sql">
+      <![CDATA[select * from empnullables as e1
+where deptno not in (
+  select coalesce(deptno, 0) from empnullables e2)]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], 
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+  LogicalFilter(condition=[NOT(IN($7, {
+LogicalProject(EXPR$0=[CASE(IS NOT NULL($7), CAST($7):INTEGER NOT NULL, 0)])
+  LogicalTableScan(table=[[CATALOG, SALES, EMPNULLABLES]])
+}))])
+    LogicalTableScan(table=[[CATALOG, SALES, EMPNULLABLES]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], 
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+  LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], 
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+    LogicalFilter(condition=[OR(=($9, 0), AND(IS NULL($12), >=($10, $9), IS 
NOT NULL($7)))])
+      LogicalJoin(condition=[=($7, $11)], joinType=[left])
+        LogicalJoin(condition=[true], joinType=[inner])
+          LogicalTableScan(table=[[CATALOG, SALES, EMPNULLABLES]])
+          LogicalProject(c=[$0], ck=[$0])
+            LogicalAggregate(group=[{}], c=[COUNT()])
+              LogicalTableScan(table=[[CATALOG, SALES, EMPNULLABLES]])
+        LogicalAggregate(group=[{0}], i=[LITERAL_AGG(true)])
+          LogicalProject(EXPR$0=[CASE(IS NOT NULL($7), CAST($7):INTEGER NOT 
NULL, 0)])
+            LogicalTableScan(table=[[CATALOG, SALES, EMPNULLABLES]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testNotInNullableSubqueryColumn">
+    <Resource name="sql">
+      <![CDATA[select * from empnullables as e1
+where coalesce(deptno, 0) not in (
+  select deptno from empnullables e2)]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], 
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+  LogicalFilter(condition=[NOT(IN(CASE(IS NOT NULL($7), CAST($7):INTEGER NOT 
NULL, 0), {
+LogicalProject(DEPTNO=[$7])
+  LogicalTableScan(table=[[CATALOG, SALES, EMPNULLABLES]])
+}))])
+    LogicalTableScan(table=[[CATALOG, SALES, EMPNULLABLES]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], 
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+  LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], 
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+    LogicalFilter(condition=[NOT(AND(OR(IS NOT NULL($12), <($10, $9)), <>($9, 
0)))])
+      LogicalJoin(condition=[=(CASE(IS NOT NULL($7), CAST($7):INTEGER NOT 
NULL, 0), $11)], joinType=[left])
+        LogicalJoin(condition=[true], joinType=[inner])
+          LogicalTableScan(table=[[CATALOG, SALES, EMPNULLABLES]])
+          LogicalAggregate(group=[{}], c=[COUNT()], ck=[COUNT($0)])
+            LogicalProject(DEPTNO=[$7])
+              LogicalTableScan(table=[[CATALOG, SALES, EMPNULLABLES]])
+        LogicalAggregate(group=[{0}], i=[LITERAL_AGG(true)])
+          LogicalProject(DEPTNO=[$7])
+            LogicalTableScan(table=[[CATALOG, SALES, EMPNULLABLES]])
 ]]>
     </Resource>
   </TestCase>
@@ -20921,17 +21010,9 @@ LogicalProject(DEPTNO=[$0])
     <Resource name="planAfter">
       <![CDATA[
 LogicalProject(SAL=[$5])
-  LogicalFilter(condition=[OR(=($9, 0), IS NOT TRUE(OR(IS NOT NULL($13), 
<($10, $9))))])
-    LogicalJoin(condition=[AND(=($0, $12), =($2, $14))], joinType=[left])
-      LogicalJoin(condition=[=($2, $11)], joinType=[left])
-        LogicalTableScan(table=[[CATALOG, SALES, EMP]])
-        LogicalProject(c=[CASE(IS NOT NULL($2), $2, 0)], ck=[CASE(IS NOT 
NULL($2), $2, 0)], NAME=[$0])
-          LogicalJoin(condition=[IS NOT DISTINCT FROM($0, $1)], 
joinType=[left])
-            LogicalAggregate(group=[{2}])
-              LogicalTableScan(table=[[CATALOG, SALES, EMP]])
-            LogicalAggregate(group=[{0}], c=[COUNT()])
-              LogicalProject(NAME=[$1])
-                LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+  LogicalFilter(condition=[IS NULL($10)])
+    LogicalJoin(condition=[AND(=($0, $9), =($2, $11))], joinType=[left])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
       LogicalProject(DEPTNO=[$0], i=[true], NAME=[$1])
         LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
 ]]>
@@ -20940,14 +21021,9 @@ LogicalProject(SAL=[$5])
       <![CDATA[
 LogicalProject(SAL=[$5])
   LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], 
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
-    LogicalFilter(condition=[OR(=($9, 0), IS NOT TRUE(OR(IS NOT NULL($12), 
<($10, $9))))])
+    LogicalFilter(condition=[IS NULL($10)])
       LogicalCorrelate(correlation=[$cor0], joinType=[left], 
requiredColumns=[{0, 2}])
-        LogicalCorrelate(correlation=[$cor0], joinType=[left], 
requiredColumns=[{2}])
-          LogicalTableScan(table=[[CATALOG, SALES, EMP]])
-          LogicalProject(c=[$0], ck=[$0])
-            LogicalAggregate(group=[{}], c=[COUNT()])
-              LogicalFilter(condition=[=($cor0.JOB, $1)])
-                LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+        LogicalTableScan(table=[[CATALOG, SALES, EMP]])
         LogicalFilter(condition=[=($cor0.EMPNO, $0)])
           LogicalProject(DEPTNO=[$0], i=[true])
             LogicalFilter(condition=[=($cor0.JOB, $1)])
@@ -20978,15 +21054,9 @@ LogicalProject(EMPNO=[$1])
       <![CDATA[
 LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], 
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
   LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], 
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
-    LogicalFilter(condition=[OR(=($9, 0), IS NOT TRUE(OR(IS NOT NULL($12), 
<($10, $9))))])
+    LogicalFilter(condition=[IS NULL($10)])
       LogicalCorrelate(correlation=[$cor0], joinType=[left], 
requiredColumns=[{0, 1}])
-        LogicalCorrelate(correlation=[$cor0], joinType=[left], 
requiredColumns=[{1}])
-          LogicalTableScan(table=[[CATALOG, SALES, EMP]])
-          LogicalProject(c=[$0], ck=[$0])
-            LogicalAggregate(group=[{}], c=[COUNT()])
-              LogicalFilter(condition=[AND(>($2, 2), =($cor0.ENAME, $0))])
-                LogicalProject(ENAME=[$1], EMPNO=[$0], R=[$5])
-                  LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+        LogicalTableScan(table=[[CATALOG, SALES, EMP]])
         LogicalFilter(condition=[=($cor0.EMPNO, $0)])
           LogicalProject(EMPNO=[$1], i=[true])
             LogicalFilter(condition=[AND(>($2, 2), =($cor0.ENAME, $0))])
@@ -20997,19 +21067,9 @@ LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], 
MGR=[$3], HIREDATE=[$4], SAL=[$
     <Resource name="planAfter">
       <![CDATA[
 LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], 
SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
-  LogicalFilter(condition=[OR(=($9, 0), IS NOT TRUE(OR(IS NOT NULL($13), 
<($10, $9))))])
-    LogicalJoin(condition=[AND(=($0, $12), =($1, $14))], joinType=[left])
-      LogicalJoin(condition=[=($1, $11)], joinType=[left])
-        LogicalTableScan(table=[[CATALOG, SALES, EMP]])
-        LogicalProject(c=[CASE(IS NOT NULL($2), $2, 0)], ck=[CASE(IS NOT 
NULL($2), $2, 0)], ENAME=[$0])
-          LogicalJoin(condition=[IS NOT DISTINCT FROM($0, $1)], 
joinType=[left])
-            LogicalAggregate(group=[{1}])
-              LogicalTableScan(table=[[CATALOG, SALES, EMP]])
-            LogicalAggregate(group=[{0}], c=[COUNT()])
-              LogicalProject(ENAME=[$0])
-                LogicalFilter(condition=[>($2, 2)])
-                  LogicalProject(ENAME=[$1], EMPNO=[$0], R=[$5])
-                    LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+  LogicalFilter(condition=[IS NULL($10)])
+    LogicalJoin(condition=[AND(=($0, $9), =($1, $11))], joinType=[left])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
       LogicalProject(EMPNO=[$1], i=[true], ENAME=[$0])
         LogicalFilter(condition=[>($2, 2)])
           LogicalProject(ENAME=[$1], EMPNO=[$0], R=[$5])
diff --git a/core/src/test/resources/sql/sub-query.iq 
b/core/src/test/resources/sql/sub-query.iq
index 2fa71122fb..59fda9fca2 100644
--- a/core/src/test/resources/sql/sub-query.iq
+++ b/core/src/test/resources/sql/sub-query.iq
@@ -2023,7 +2023,7 @@ select sal from "scott".emp
 (0 rows)
 
 !ok
-EnumerableCalc(expr#0..3=[{inputs}], expr#4=[IS NULL($t3)], expr#5=[NOT($t2)], 
expr#6=[IS NOT NULL($t2)], expr#7=[OR($t5, $t6)], expr#8=[IS NOT TRUE($t7)], 
expr#9=[OR($t4, $t8)], SAL=[$t1], $condition=[$t9])
+EnumerableCalc(expr#0..3=[{inputs}], expr#4=[NOT($t2)], expr#5=[IS NOT 
NULL($t2)], expr#6=[OR($t4, $t5)], expr#7=[IS NOT TRUE($t6)], expr#8=[IS 
NULL($t3)], expr#9=[OR($t7, $t8)], SAL=[$t1], $condition=[$t9])
   EnumerableNestedLoopJoin(condition=[true], joinType=[left])
     EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5])
       EnumerableTableScan(table=[[scott, EMP]])
@@ -2128,7 +2128,7 @@ select sal from "scott".emp
 (0 rows)
 
 !ok
-EnumerableCalc(expr#0..3=[{inputs}], expr#4=[IS NULL($t3)], expr#5=[NOT($t2)], 
expr#6=[IS NOT NULL($t2)], expr#7=[OR($t5, $t6)], expr#8=[IS NOT TRUE($t7)], 
expr#9=[OR($t4, $t8)], SAL=[$t1], $condition=[$t9])
+EnumerableCalc(expr#0..3=[{inputs}], expr#4=[NOT($t2)], expr#5=[IS NOT 
NULL($t2)], expr#6=[OR($t4, $t5)], expr#7=[IS NOT TRUE($t6)], expr#8=[IS 
NULL($t3)], expr#9=[OR($t7, $t8)], SAL=[$t1], $condition=[$t9])
   EnumerableNestedLoopJoin(condition=[true], joinType=[left])
     EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], SAL=[$t5])
       EnumerableTableScan(table=[[scott, EMP]])
@@ -2610,6 +2610,44 @@ EnumerableCalc(expr#0..1=[{inputs}], EXPR$0=[$t1])
 
 !use scott
 
+# [CALCITE-7317] SubQueryRemoveRule should skip NULL-safety checks for IN 
subqueries when both keys and subquery columns are NOT NULL
+select * from emp as e1 where empno in (select empno from emp e2);
+ EMPNO | ENAME  | JOB       | MGR  | HIREDATE   | SAL     | COMM    | DEPTNO
+-------+--------+-----------+------+------------+---------+---------+--------
+  7369 | SMITH  | CLERK     | 7902 | 1980-12-17 |  800.00 |         |     20
+  7499 | ALLEN  | SALESMAN  | 7698 | 1981-02-20 | 1600.00 |  300.00 |     30
+  7521 | WARD   | SALESMAN  | 7698 | 1981-02-22 | 1250.00 |  500.00 |     30
+  7566 | JONES  | MANAGER   | 7839 | 1981-02-04 | 2975.00 |         |     20
+  7654 | MARTIN | SALESMAN  | 7698 | 1981-09-28 | 1250.00 | 1400.00 |     30
+  7698 | BLAKE  | MANAGER   | 7839 | 1981-01-05 | 2850.00 |         |     30
+  7782 | CLARK  | MANAGER   | 7839 | 1981-06-09 | 2450.00 |         |     10
+  7788 | SCOTT  | ANALYST   | 7566 | 1987-04-19 | 3000.00 |         |     20
+  7839 | KING   | PRESIDENT |      | 1981-11-17 | 5000.00 |         |     10
+  7844 | TURNER | SALESMAN  | 7698 | 1981-09-08 | 1500.00 |    0.00 |     30
+  7876 | ADAMS  | CLERK     | 7788 | 1987-05-23 | 1100.00 |         |     20
+  7900 | JAMES  | CLERK     | 7698 | 1981-12-03 |  950.00 |         |     30
+  7902 | FORD   | ANALYST   | 7566 | 1981-12-03 | 3000.00 |         |     20
+  7934 | MILLER | CLERK     | 7782 | 1982-01-23 | 1300.00 |         |     10
+(14 rows)
+
+!ok
+
+# [CALCITE-7317] SubQueryRemoveRule should skip NULL-safety checks for IN 
subqueries when both keys and subquery columns are NOT NULL
+select * from emp as e1 where coalesce(deptno, 0) not in (select deptno from 
emp e2);
+ EMPNO | ENAME | JOB | MGR | HIREDATE | SAL | COMM | DEPTNO
+-------+-------+-----+-----+----------+-----+------+--------
+(0 rows)
+
+!ok
+
+# [CALCITE-7317] SubQueryRemoveRule should skip NULL-safety checks for IN 
subqueries when both keys and subquery columns are NOT NULL
+select * from emp as e1 where deptno not in (select coalesce(deptno, 0) from 
emp e2);
+ EMPNO | ENAME | JOB | MGR | HIREDATE | SAL | COMM | DEPTNO
+-------+-------+-----+-----+----------+-----+------+--------
+(0 rows)
+
+!ok
+
 # [CALCITE-1513] Correlated NOT IN query throws AssertionError
 select count(*) as c
 from "scott".emp as e
@@ -3920,7 +3958,7 @@ select * from "scott".emp where empno not in (null, 7782);
 
 !ok
 
-EnumerableCalc(expr#0..12=[{inputs}], expr#13=[0], expr#14=[=($t8, $t13)], 
expr#15=[IS NULL($t12)], expr#16=[>=($t9, $t8)], expr#17=[AND($t15, $t16)], 
expr#18=[OR($t14, $t17)], proj#0..7=[{exprs}], $condition=[$t18])
+EnumerableCalc(expr#0..12=[{inputs}], expr#13=[IS NULL($t12)], 
expr#14=[>=($t9, $t8)], expr#15=[AND($t13, $t14)], expr#16=[0], expr#17=[=($t8, 
$t16)], expr#18=[OR($t15, $t17)], proj#0..7=[{exprs}], $condition=[$t18])
   EnumerableMergeJoin(condition=[=($10, $11)], joinType=[left])
     EnumerableSort(sort0=[$10], dir0=[ASC])
       EnumerableCalc(expr#0..9=[{inputs}], expr#10=[CAST($t0):INTEGER NOT 
NULL], proj#0..10=[{exprs}])
@@ -5505,4 +5543,5 @@ WHERE EXISTS
 (2 rows)
 
 !ok
+
 # End sub-query.iq

(calcite) branch main updated: [CALCITE-7317] SubQueryRemoveRule should skip NULL-safety checks for IN subqueries when both the keys and the subquery columns are NOT NULL

Reply via email to