This is an automated email from the ASF dual-hosted git repository.

xiong pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/main by this push:
     new f347fba2d2 [CALCITE-6650] Optimize the IN sub-query and SOME sub-query 
by Metadata RowCount
f347fba2d2 is described below

commit f347fba2d25d62c98fe3bb5f28bc9b508f7ca017
Author: Xiong Duan <[email protected]>
AuthorDate: Tue Oct 29 18:52:20 2024 +0800

    [CALCITE-6650] Optimize the IN sub-query and SOME sub-query by Metadata 
RowCount
---
 .../calcite/rel/rules/SubQueryRemoveRule.java      |  18 ++-
 .../org/apache/calcite/test/RelOptRulesTest.java   |  46 ++++++
 .../org/apache/calcite/test/RelOptRulesTest.xml    | 144 ++++++++++++++++++
 core/src/test/resources/sql/sub-query.iq           | 163 ++++++++++++++++++++-
 4 files changed, 368 insertions(+), 3 deletions(-)

diff --git 
a/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java 
b/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
index 6feee83c76..3e9473f0b6 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
@@ -170,6 +170,13 @@ public class SubQueryRemoveRule
    */
   private static RexNode rewriteSome(RexSubQuery e, Set<CorrelationId> 
variablesSet,
       RelBuilder builder, int subQueryIndex) {
+    // If the sub-query is guaranteed to return 0 row, just return
+    // FALSE.
+    final RelMetadataQuery mq = e.rel.getCluster().getMetadataQuery();
+    final Double maxRowCount = mq.getMaxRowCount(e.rel);
+    if (maxRowCount != null && maxRowCount <= 0D) {
+      return builder.getRexBuilder().makeLiteral(Boolean.FALSE, e.getType(), 
true);
+    }
     // Most general case, where the left and right keys might have nulls, and
     // caller requires 3-valued logic return.
     //
@@ -459,11 +466,11 @@ public class SubQueryRemoveRule
     // TRUE.
     final RelMetadataQuery mq = e.rel.getCluster().getMetadataQuery();
     final Double minRowCount = mq.getMinRowCount(e.rel);
-    if (minRowCount != null && minRowCount >= 1D) {
+    if (minRowCount != null && minRowCount > 0D) {
       return builder.literal(true);
     }
     final Double maxRowCount = mq.getMaxRowCount(e.rel);
-    if (maxRowCount != null && maxRowCount < 1D) {
+    if (maxRowCount != null && maxRowCount <= 0D) {
       return builder.literal(false);
     }
     builder.push(e.rel);
@@ -555,6 +562,13 @@ public class SubQueryRemoveRule
    */
   private static RexNode rewriteIn(RexSubQuery e, Set<CorrelationId> 
variablesSet,
       RelOptUtil.Logic logic, RelBuilder builder, int offset, int 
subQueryIndex) {
+    // If the sub-query is guaranteed to return 0 row, just return
+    // FALSE.
+    final RelMetadataQuery mq = e.rel.getCluster().getMetadataQuery();
+    final Double maxRowCount = mq.getMaxRowCount(e.rel);
+    if (maxRowCount != null && maxRowCount <= 0D) {
+      return builder.getRexBuilder().makeLiteral(Boolean.FALSE, e.getType(), 
true);
+    }
     // Most general case, where the left and right keys might have nulls, and
     // caller requires 3-valued logic return.
     //
diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java 
b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
index 0454021eee..5fe12d9617 100644
--- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -5049,6 +5049,52 @@ class RelOptRulesTest extends RelOptTestBase {
     sql(sql).withSubQueryRules().check();
   }
 
+  /** Test case for
+   * <a 
href="https://issues.apache.org/jira/browse/CALCITE-6650";>[CALCITE-6650]
+   * Optimize the IN sub-query and SOME sub-query
+   * by Metadata RowCount</a>. */
+  @Test void testInWithNoRowSubQuery() {
+    final String sql = "select * from dept as d\n"
+        + "where deptno in (\n"
+        + "  select deptno from emp e where false)";
+    sql(sql).withSubQueryRules().check();
+  }
+
+  @Test void testNotInWithNoRowSubQuery() {
+    final String sql = "select * from dept as d\n"
+        + "where deptno not in (\n"
+        + "  select deptno from emp e where false)";
+    sql(sql).withSubQueryRules().check();
+  }
+
+  @Test void testSomeWithGreaterThanNoRowSubQuery() {
+    final String sql = "select * from dept as d\n"
+        + "where deptno > some(\n"
+        + "  select deptno from emp e where false)";
+    sql(sql).withSubQueryRules().check();
+  }
+
+  @Test void testSomeWithLessThanOrEqualNoRowSubQuery() {
+    final String sql = "select * from dept as d\n"
+        + "where deptno <= some(\n"
+        + "  select deptno from emp e where false)";
+    sql(sql).withSubQueryRules().check();
+  }
+
+  @Test void testUniqueWithNoRowSubQuery() {
+    final String sql = "select * from dept as d\n"
+        + "where unique(\n"
+        + "  select deptno from emp e where false)";
+    sql(sql).withSubQueryRules().check();
+  }
+
+  @Test void testNotUniqueWithNoRowSubQuery() {
+    final String sql = "select * from dept as d\n"
+        + "where not unique(\n"
+        + "  select deptno from emp e where false)";
+    sql(sql).withSubQueryRules().check();
+  }
+
   /** Test case for
    * <a 
href="https://issues.apache.org/jira/browse/CALCITE-4848";>[CALCITE-4848]
    * Adding a HAVING condition to a query with a dynamic parameter makes the 
result always empty
diff --git 
a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml 
b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index dacda972b1..eba397a3b9 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -5081,6 +5081,30 @@ LogicalUnion(all=[true])
     LogicalTableScan(table=[[CATALOG, SALES, EMP]])
   LogicalProject(EXPR$0=[LOWER($1)])
     LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testInWithNoRowSubQuery">
+    <Resource name="sql">
+      <![CDATA[select * from dept as d
+where deptno in (
+  select deptno from emp e where false)]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalProject(DEPTNO=[$0], NAME=[$1])
+  LogicalFilter(condition=[IN($0, {
+LogicalProject(DEPTNO=[$7])
+  LogicalFilter(condition=[false])
+    LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+})])
+    LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalProject(DEPTNO=[$0], NAME=[$1])
+  LogicalValues(tuples=[[]])
 ]]>
     </Resource>
   </TestCase>
@@ -7724,6 +7748,30 @@ LogicalProject(USER=[USER])
 LogicalAggregate(group=[{0}], EXPR$1=[SUM($1)])
   LogicalProject(NAME=[$1], DEPTNO=[$0])
     LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testNotInWithNoRowSubQuery">
+    <Resource name="sql">
+      <![CDATA[select * from dept as d
+where deptno not in (
+  select deptno from emp e where false)]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalProject(DEPTNO=[$0], NAME=[$1])
+  LogicalFilter(condition=[NOT(IN($0, {
+LogicalProject(DEPTNO=[$7])
+  LogicalFilter(condition=[false])
+    LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+}))])
+    LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalProject(DEPTNO=[$0], NAME=[$1])
+  LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
 ]]>
     </Resource>
   </TestCase>
@@ -7762,6 +7810,30 @@ LogicalProject(NAME=[$1], DEPTNO=[$0], EXPR$2=[$2])
   LogicalFilter(condition=[=($0, 0)])
     LogicalWindow(window#0=[window(partition {1} aggs [COUNT()])])
       LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testNotUniqueWithNoRowSubQuery">
+    <Resource name="sql">
+      <![CDATA[select * from dept as d
+where not unique(
+  select deptno from emp e where false)]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalProject(DEPTNO=[$0], NAME=[$1])
+  LogicalFilter(condition=[NOT(UNIQUE({
+LogicalProject(DEPTNO=[$7])
+  LogicalFilter(condition=[false])
+    LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+}))])
+    LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalProject(DEPTNO=[$0], NAME=[$1])
+  LogicalValues(tuples=[[]])
 ]]>
     </Resource>
   </TestCase>
@@ -14508,6 +14580,54 @@ LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], 
MGR=[$3], HIREDATE=[$4], SAL=[$
       LogicalAggregate(group=[{0}])
         LogicalProject(NAME=[$1])
           LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testSomeWithGreaterThanNoRowSubQuery">
+    <Resource name="sql">
+      <![CDATA[select * from dept as d
+where deptno > some(
+  select deptno from emp e where false)]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalProject(DEPTNO=[$0], NAME=[$1])
+  LogicalFilter(condition=[> SOME($0, {
+LogicalProject(DEPTNO=[$7])
+  LogicalFilter(condition=[false])
+    LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+})])
+    LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalProject(DEPTNO=[$0], NAME=[$1])
+  LogicalValues(tuples=[[]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testSomeWithLessThanOrEqualNoRowSubQuery">
+    <Resource name="sql">
+      <![CDATA[select * from dept as d
+where deptno <= some(
+  select deptno from emp e where false)]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalProject(DEPTNO=[$0], NAME=[$1])
+  LogicalFilter(condition=[<= SOME($0, {
+LogicalProject(DEPTNO=[$7])
+  LogicalFilter(condition=[false])
+    LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+})])
+    LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalProject(DEPTNO=[$0], NAME=[$1])
+  LogicalValues(tuples=[[]])
 ]]>
     </Resource>
   </TestCase>
@@ -16888,6 +17008,30 @@ LogicalUnion(all=[true])
     <Resource name="planAfter">
       <![CDATA[
 LogicalValues(tuples=[[{ 5 }, { 5 }]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testUniqueWithNoRowSubQuery">
+    <Resource name="sql">
+      <![CDATA[select * from dept as d
+where unique(
+  select deptno from emp e where false)]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalProject(DEPTNO=[$0], NAME=[$1])
+  LogicalFilter(condition=[UNIQUE({
+LogicalProject(DEPTNO=[$7])
+  LogicalFilter(condition=[false])
+    LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+})])
+    LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalProject(DEPTNO=[$0], NAME=[$1])
+  LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
 ]]>
     </Resource>
   </TestCase>
diff --git a/core/src/test/resources/sql/sub-query.iq 
b/core/src/test/resources/sql/sub-query.iq
index 382ad91ebd..f1adf623ad 100644
--- a/core/src/test/resources/sql/sub-query.iq
+++ b/core/src/test/resources/sql/sub-query.iq
@@ -163,7 +163,10 @@ select * from dept where deptno not in (select deptno from 
emp where false);
 (4 rows)
 
 !ok
-select deptno, deptno     in (select deptno from emp where false) from dept;
+EnumerableValues(tuples=[[{ 10, 'Sales      ' }, { 20, 'Marketing  ' }, { 30, 
'Engineering' }, { 40, 'Empty      ' }]])
+!plan
+
+select deptno, deptno in (select deptno from emp where false) from dept;
 +--------+--------+
 | DEPTNO | EXPR$1 |
 +--------+--------+
@@ -175,6 +178,10 @@ select deptno, deptno     in (select deptno from emp where 
false) from dept;
 (4 rows)
 
 !ok
+EnumerableCalc(expr#0..1=[{inputs}], expr#2=[false], 
expr#3=[CAST($t2):BOOLEAN], DEPTNO=[$t0], EXPR$1=[$t3])
+  EnumerableValues(tuples=[[{ 10, 'Sales      ' }, { 20, 'Marketing  ' }, { 
30, 'Engineering' }, { 40, 'Empty      ' }]])
+!plan
+
 select deptno, deptno not in (select deptno from emp where false) from dept;
 +--------+--------+
 | DEPTNO | EXPR$1 |
@@ -4238,4 +4245,158 @@ EnumerableCalc(expr#0..8=[{inputs}], 
proj#0..7=[{exprs}])
       EnumerableTableScan(table=[[scott, EMP]])
 !plan
 
+
+# [CALCITE-6650] Optimize the IN sub-query and SOME sub-query by Metadata 
RowCount
+
+!use post
+
+# Test case about SOME sub-query when sub-query return 0 row
+select * from emp where deptno > some(select deptno from dept where false);
++-------+--------+--------+
+| ENAME | DEPTNO | GENDER |
++-------+--------+--------+
++-------+--------+--------+
+(0 rows)
+
+!ok
+
+EnumerableValues(tuples=[[]])
+!plan
+
+# Same as previous; but is Scalar sub-query
+select deptno, deptno > some(select deptno from dept where false) from emp;
++--------+--------+
+| DEPTNO | EXPR$1 |
++--------+--------+
+|     10 | false  |
+|     20 | false  |
+|     30 | false  |
+|     10 | false  |
+|     30 | false  |
+|     50 | false  |
+|     50 | false  |
+|     60 | false  |
+|        | false  |
++--------+--------+
+(9 rows)
+
+!ok
+
+EnumerableCalc(expr#0=[{inputs}], expr#1=[false], expr#2=[CAST($t1):BOOLEAN], 
DEPTNO=[$t0], EXPR$1=[$t2])
+  EnumerableValues(tuples=[[{ 10 }, { 10 }, { 20 }, { 30 }, { 30 }, { 50 }, { 
50 }, { 60 }, { null }]])
+!plan
+
+# Test case about ANY sub-query when sub-query return 0 row
+select * from emp where deptno > any(select deptno from dept where false);
++-------+--------+--------+
+| ENAME | DEPTNO | GENDER |
++-------+--------+--------+
++-------+--------+--------+
+(0 rows)
+
+!ok
+
+EnumerableValues(tuples=[[]])
+!plan
+
+# Same as previous; but is Scalar sub-query
+select deptno, deptno > any(select deptno from dept where false) from emp;
++--------+--------+
+| DEPTNO | EXPR$1 |
++--------+--------+
+|     10 | false  |
+|     10 | false  |
+|     20 | false  |
+|     30 | false  |
+|     30 | false  |
+|     50 | false  |
+|     50 | false  |
+|     60 | false  |
+|        | false  |
++--------+--------+
+(9 rows)
+
+!ok
+
+EnumerableCalc(expr#0=[{inputs}], expr#1=[false], expr#2=[CAST($t1):BOOLEAN], 
DEPTNO=[$t0], EXPR$1=[$t2])
+  EnumerableValues(tuples=[[{ 10 }, { 10 }, { 20 }, { 30 }, { 30 }, { 50 }, { 
50 }, { 60 }, { null }]])
+!plan
+
+# Test case about UNIQUE sub-query when sub-query return 0 row
+select * from emp where unique (select deptno from dept where false);
++-------+--------+--------+
+| ENAME | DEPTNO | GENDER |
++-------+--------+--------+
+| Adam  |     50 | M      |
+| Alice |     30 | F      |
+| Bob   |     10 | M      |
+| Eric  |     20 | M      |
+| Eve   |     50 | F      |
+| Grace |     60 | F      |
+| Jane  |     10 | F      |
+| Susan |     30 | F      |
+| Wilma |        | F      |
++-------+--------+--------+
+(9 rows)
+
+!ok
+
+EnumerableValues(tuples=[[{ 'Jane ', 10, 'F' }, { 'Bob  ', 10, 'M' }, { 'Eric 
', 20, 'M' }, { 'Susan', 30, 'F' }, { 'Alice', 30, 'F' }, { 'Adam ', 50, 'M' }, 
{ 'Eve  ', 50, 'F' }, { 'Grace', 60, 'F' }, { 'Wilma', null, 'F' }]])
+!plan
+
+# Same as previous; but is Scalar sub-query
+select unique (select deptno from dept where false) from emp;
++--------+
+| EXPR$0 |
++--------+
+| true   |
+| true   |
+| true   |
+| true   |
+| true   |
+| true   |
+| true   |
+| true   |
+| true   |
++--------+
+(9 rows)
+
+!ok
+
+EnumerableCalc(expr#0=[{inputs}], expr#1=[true], EXPR$0=[$t1])
+  EnumerableValues(tuples=[[{ 10 }, { 10 }, { 20 }, { 30 }, { 30 }, { 50 }, { 
50 }, { 60 }, { null }]])
+!plan
+
+
+# Test case about NOT UNIQUE sub-query when sub-query return 0 row
+select * from emp where not unique (select deptno from dept where false);
++-------+--------+--------+
+| ENAME | DEPTNO | GENDER |
++-------+--------+--------+
++-------+--------+--------+
+(0 rows)
+
+!ok
+
+EnumerableValues(tuples=[[]])
+!plan
+
+# Same as previous; but is Scalar sub-query
+select not unique (select deptno from dept where false) from dept;
++--------+
+| EXPR$0 |
++--------+
+| false  |
+| false  |
+| false  |
+| false  |
++--------+
+(4 rows)
+
+!ok
+
+EnumerableCalc(expr#0..1=[{inputs}], expr#2=[false], EXPR$0=[$t2])
+  EnumerableValues(tuples=[[{ 10, 'Sales      ' }, { 20, 'Marketing  ' }, { 
30, 'Engineering' }, { 40, 'Empty      ' }]])
+!plan
+
 # End sub-query.iq

Reply via email to