This is an automated email from the ASF dual-hosted git repository.
xiong pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new f347fba2d2 [CALCITE-6650] Optimize the IN sub-query and SOME sub-query
by Metadata RowCount
f347fba2d2 is described below
commit f347fba2d25d62c98fe3bb5f28bc9b508f7ca017
Author: Xiong Duan <[email protected]>
AuthorDate: Tue Oct 29 18:52:20 2024 +0800
[CALCITE-6650] Optimize the IN sub-query and SOME sub-query by Metadata
RowCount
---
.../calcite/rel/rules/SubQueryRemoveRule.java | 18 ++-
.../org/apache/calcite/test/RelOptRulesTest.java | 46 ++++++
.../org/apache/calcite/test/RelOptRulesTest.xml | 144 ++++++++++++++++++
core/src/test/resources/sql/sub-query.iq | 163 ++++++++++++++++++++-
4 files changed, 368 insertions(+), 3 deletions(-)
diff --git
a/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
b/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
index 6feee83c76..3e9473f0b6 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
@@ -170,6 +170,13 @@ public class SubQueryRemoveRule
*/
private static RexNode rewriteSome(RexSubQuery e, Set<CorrelationId>
variablesSet,
RelBuilder builder, int subQueryIndex) {
+ // If the sub-query is guaranteed to return 0 row, just return
+ // FALSE.
+ final RelMetadataQuery mq = e.rel.getCluster().getMetadataQuery();
+ final Double maxRowCount = mq.getMaxRowCount(e.rel);
+ if (maxRowCount != null && maxRowCount <= 0D) {
+ return builder.getRexBuilder().makeLiteral(Boolean.FALSE, e.getType(),
true);
+ }
// Most general case, where the left and right keys might have nulls, and
// caller requires 3-valued logic return.
//
@@ -459,11 +466,11 @@ public class SubQueryRemoveRule
// TRUE.
final RelMetadataQuery mq = e.rel.getCluster().getMetadataQuery();
final Double minRowCount = mq.getMinRowCount(e.rel);
- if (minRowCount != null && minRowCount >= 1D) {
+ if (minRowCount != null && minRowCount > 0D) {
return builder.literal(true);
}
final Double maxRowCount = mq.getMaxRowCount(e.rel);
- if (maxRowCount != null && maxRowCount < 1D) {
+ if (maxRowCount != null && maxRowCount <= 0D) {
return builder.literal(false);
}
builder.push(e.rel);
@@ -555,6 +562,13 @@ public class SubQueryRemoveRule
*/
private static RexNode rewriteIn(RexSubQuery e, Set<CorrelationId>
variablesSet,
RelOptUtil.Logic logic, RelBuilder builder, int offset, int
subQueryIndex) {
+ // If the sub-query is guaranteed to return 0 row, just return
+ // FALSE.
+ final RelMetadataQuery mq = e.rel.getCluster().getMetadataQuery();
+ final Double maxRowCount = mq.getMaxRowCount(e.rel);
+ if (maxRowCount != null && maxRowCount <= 0D) {
+ return builder.getRexBuilder().makeLiteral(Boolean.FALSE, e.getType(),
true);
+ }
// Most general case, where the left and right keys might have nulls, and
// caller requires 3-valued logic return.
//
diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
index 0454021eee..5fe12d9617 100644
--- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -5049,6 +5049,52 @@ class RelOptRulesTest extends RelOptTestBase {
sql(sql).withSubQueryRules().check();
}
+ /** Test case for
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-6650">[CALCITE-6650]
+ * Optimize the IN sub-query and SOME sub-query
+ * by Metadata RowCount</a>. */
+ @Test void testInWithNoRowSubQuery() {
+ final String sql = "select * from dept as d\n"
+ + "where deptno in (\n"
+ + " select deptno from emp e where false)";
+ sql(sql).withSubQueryRules().check();
+ }
+
+ @Test void testNotInWithNoRowSubQuery() {
+ final String sql = "select * from dept as d\n"
+ + "where deptno not in (\n"
+ + " select deptno from emp e where false)";
+ sql(sql).withSubQueryRules().check();
+ }
+
+ @Test void testSomeWithGreaterThanNoRowSubQuery() {
+ final String sql = "select * from dept as d\n"
+ + "where deptno > some(\n"
+ + " select deptno from emp e where false)";
+ sql(sql).withSubQueryRules().check();
+ }
+
+ @Test void testSomeWithLessThanOrEqualNoRowSubQuery() {
+ final String sql = "select * from dept as d\n"
+ + "where deptno <= some(\n"
+ + " select deptno from emp e where false)";
+ sql(sql).withSubQueryRules().check();
+ }
+
+ @Test void testUniqueWithNoRowSubQuery() {
+ final String sql = "select * from dept as d\n"
+ + "where unique(\n"
+ + " select deptno from emp e where false)";
+ sql(sql).withSubQueryRules().check();
+ }
+
+ @Test void testNotUniqueWithNoRowSubQuery() {
+ final String sql = "select * from dept as d\n"
+ + "where not unique(\n"
+ + " select deptno from emp e where false)";
+ sql(sql).withSubQueryRules().check();
+ }
+
/** Test case for
* <a
href="https://issues.apache.org/jira/browse/CALCITE-4848">[CALCITE-4848]
* Adding a HAVING condition to a query with a dynamic parameter makes the
result always empty
diff --git
a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index dacda972b1..eba397a3b9 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -5081,6 +5081,30 @@ LogicalUnion(all=[true])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalProject(EXPR$0=[LOWER($1)])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testInWithNoRowSubQuery">
+ <Resource name="sql">
+ <![CDATA[select * from dept as d
+where deptno in (
+ select deptno from emp e where false)]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(DEPTNO=[$0], NAME=[$1])
+ LogicalFilter(condition=[IN($0, {
+LogicalProject(DEPTNO=[$7])
+ LogicalFilter(condition=[false])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+})])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(DEPTNO=[$0], NAME=[$1])
+ LogicalValues(tuples=[[]])
]]>
</Resource>
</TestCase>
@@ -7724,6 +7748,30 @@ LogicalProject(USER=[USER])
LogicalAggregate(group=[{0}], EXPR$1=[SUM($1)])
LogicalProject(NAME=[$1], DEPTNO=[$0])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testNotInWithNoRowSubQuery">
+ <Resource name="sql">
+ <![CDATA[select * from dept as d
+where deptno not in (
+ select deptno from emp e where false)]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(DEPTNO=[$0], NAME=[$1])
+ LogicalFilter(condition=[NOT(IN($0, {
+LogicalProject(DEPTNO=[$7])
+ LogicalFilter(condition=[false])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+}))])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(DEPTNO=[$0], NAME=[$1])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
]]>
</Resource>
</TestCase>
@@ -7762,6 +7810,30 @@ LogicalProject(NAME=[$1], DEPTNO=[$0], EXPR$2=[$2])
LogicalFilter(condition=[=($0, 0)])
LogicalWindow(window#0=[window(partition {1} aggs [COUNT()])])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testNotUniqueWithNoRowSubQuery">
+ <Resource name="sql">
+ <![CDATA[select * from dept as d
+where not unique(
+ select deptno from emp e where false)]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(DEPTNO=[$0], NAME=[$1])
+ LogicalFilter(condition=[NOT(UNIQUE({
+LogicalProject(DEPTNO=[$7])
+ LogicalFilter(condition=[false])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+}))])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(DEPTNO=[$0], NAME=[$1])
+ LogicalValues(tuples=[[]])
]]>
</Resource>
</TestCase>
@@ -14508,6 +14580,54 @@ LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2],
MGR=[$3], HIREDATE=[$4], SAL=[$
LogicalAggregate(group=[{0}])
LogicalProject(NAME=[$1])
LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testSomeWithGreaterThanNoRowSubQuery">
+ <Resource name="sql">
+ <![CDATA[select * from dept as d
+where deptno > some(
+ select deptno from emp e where false)]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(DEPTNO=[$0], NAME=[$1])
+ LogicalFilter(condition=[> SOME($0, {
+LogicalProject(DEPTNO=[$7])
+ LogicalFilter(condition=[false])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+})])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(DEPTNO=[$0], NAME=[$1])
+ LogicalValues(tuples=[[]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testSomeWithLessThanOrEqualNoRowSubQuery">
+ <Resource name="sql">
+ <![CDATA[select * from dept as d
+where deptno <= some(
+ select deptno from emp e where false)]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(DEPTNO=[$0], NAME=[$1])
+ LogicalFilter(condition=[<= SOME($0, {
+LogicalProject(DEPTNO=[$7])
+ LogicalFilter(condition=[false])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+})])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(DEPTNO=[$0], NAME=[$1])
+ LogicalValues(tuples=[[]])
]]>
</Resource>
</TestCase>
@@ -16888,6 +17008,30 @@ LogicalUnion(all=[true])
<Resource name="planAfter">
<![CDATA[
LogicalValues(tuples=[[{ 5 }, { 5 }]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testUniqueWithNoRowSubQuery">
+ <Resource name="sql">
+ <![CDATA[select * from dept as d
+where unique(
+ select deptno from emp e where false)]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(DEPTNO=[$0], NAME=[$1])
+ LogicalFilter(condition=[UNIQUE({
+LogicalProject(DEPTNO=[$7])
+ LogicalFilter(condition=[false])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+})])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(DEPTNO=[$0], NAME=[$1])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
]]>
</Resource>
</TestCase>
diff --git a/core/src/test/resources/sql/sub-query.iq
b/core/src/test/resources/sql/sub-query.iq
index 382ad91ebd..f1adf623ad 100644
--- a/core/src/test/resources/sql/sub-query.iq
+++ b/core/src/test/resources/sql/sub-query.iq
@@ -163,7 +163,10 @@ select * from dept where deptno not in (select deptno from
emp where false);
(4 rows)
!ok
-select deptno, deptno in (select deptno from emp where false) from dept;
+EnumerableValues(tuples=[[{ 10, 'Sales ' }, { 20, 'Marketing ' }, { 30,
'Engineering' }, { 40, 'Empty ' }]])
+!plan
+
+select deptno, deptno in (select deptno from emp where false) from dept;
+--------+--------+
| DEPTNO | EXPR$1 |
+--------+--------+
@@ -175,6 +178,10 @@ select deptno, deptno in (select deptno from emp where
false) from dept;
(4 rows)
!ok
+EnumerableCalc(expr#0..1=[{inputs}], expr#2=[false],
expr#3=[CAST($t2):BOOLEAN], DEPTNO=[$t0], EXPR$1=[$t3])
+ EnumerableValues(tuples=[[{ 10, 'Sales ' }, { 20, 'Marketing ' }, {
30, 'Engineering' }, { 40, 'Empty ' }]])
+!plan
+
select deptno, deptno not in (select deptno from emp where false) from dept;
+--------+--------+
| DEPTNO | EXPR$1 |
@@ -4238,4 +4245,158 @@ EnumerableCalc(expr#0..8=[{inputs}],
proj#0..7=[{exprs}])
EnumerableTableScan(table=[[scott, EMP]])
!plan
+
+# [CALCITE-6650] Optimize the IN sub-query and SOME sub-query by Metadata
RowCount
+
+!use post
+
+# Test case about SOME sub-query when sub-query return 0 row
+select * from emp where deptno > some(select deptno from dept where false);
++-------+--------+--------+
+| ENAME | DEPTNO | GENDER |
++-------+--------+--------+
++-------+--------+--------+
+(0 rows)
+
+!ok
+
+EnumerableValues(tuples=[[]])
+!plan
+
+# Same as previous; but is Scalar sub-query
+select deptno, deptno > some(select deptno from dept where false) from emp;
++--------+--------+
+| DEPTNO | EXPR$1 |
++--------+--------+
+| 10 | false |
+| 20 | false |
+| 30 | false |
+| 10 | false |
+| 30 | false |
+| 50 | false |
+| 50 | false |
+| 60 | false |
+| | false |
++--------+--------+
+(9 rows)
+
+!ok
+
+EnumerableCalc(expr#0=[{inputs}], expr#1=[false], expr#2=[CAST($t1):BOOLEAN],
DEPTNO=[$t0], EXPR$1=[$t2])
+ EnumerableValues(tuples=[[{ 10 }, { 10 }, { 20 }, { 30 }, { 30 }, { 50 }, {
50 }, { 60 }, { null }]])
+!plan
+
+# Test case about ANY sub-query when sub-query return 0 row
+select * from emp where deptno > any(select deptno from dept where false);
++-------+--------+--------+
+| ENAME | DEPTNO | GENDER |
++-------+--------+--------+
++-------+--------+--------+
+(0 rows)
+
+!ok
+
+EnumerableValues(tuples=[[]])
+!plan
+
+# Same as previous; but is Scalar sub-query
+select deptno, deptno > any(select deptno from dept where false) from emp;
++--------+--------+
+| DEPTNO | EXPR$1 |
++--------+--------+
+| 10 | false |
+| 10 | false |
+| 20 | false |
+| 30 | false |
+| 30 | false |
+| 50 | false |
+| 50 | false |
+| 60 | false |
+| | false |
++--------+--------+
+(9 rows)
+
+!ok
+
+EnumerableCalc(expr#0=[{inputs}], expr#1=[false], expr#2=[CAST($t1):BOOLEAN],
DEPTNO=[$t0], EXPR$1=[$t2])
+ EnumerableValues(tuples=[[{ 10 }, { 10 }, { 20 }, { 30 }, { 30 }, { 50 }, {
50 }, { 60 }, { null }]])
+!plan
+
+# Test case about UNIQUE sub-query when sub-query return 0 row
+select * from emp where unique (select deptno from dept where false);
++-------+--------+--------+
+| ENAME | DEPTNO | GENDER |
++-------+--------+--------+
+| Adam | 50 | M |
+| Alice | 30 | F |
+| Bob | 10 | M |
+| Eric | 20 | M |
+| Eve | 50 | F |
+| Grace | 60 | F |
+| Jane | 10 | F |
+| Susan | 30 | F |
+| Wilma | | F |
++-------+--------+--------+
+(9 rows)
+
+!ok
+
+EnumerableValues(tuples=[[{ 'Jane ', 10, 'F' }, { 'Bob ', 10, 'M' }, { 'Eric
', 20, 'M' }, { 'Susan', 30, 'F' }, { 'Alice', 30, 'F' }, { 'Adam ', 50, 'M' },
{ 'Eve ', 50, 'F' }, { 'Grace', 60, 'F' }, { 'Wilma', null, 'F' }]])
+!plan
+
+# Same as previous; but is Scalar sub-query
+select unique (select deptno from dept where false) from emp;
++--------+
+| EXPR$0 |
++--------+
+| true |
+| true |
+| true |
+| true |
+| true |
+| true |
+| true |
+| true |
+| true |
++--------+
+(9 rows)
+
+!ok
+
+EnumerableCalc(expr#0=[{inputs}], expr#1=[true], EXPR$0=[$t1])
+ EnumerableValues(tuples=[[{ 10 }, { 10 }, { 20 }, { 30 }, { 30 }, { 50 }, {
50 }, { 60 }, { null }]])
+!plan
+
+
+# Test case about NOT UNIQUE sub-query when sub-query return 0 row
+select * from emp where not unique (select deptno from dept where false);
++-------+--------+--------+
+| ENAME | DEPTNO | GENDER |
++-------+--------+--------+
++-------+--------+--------+
+(0 rows)
+
+!ok
+
+EnumerableValues(tuples=[[]])
+!plan
+
+# Same as previous; but is Scalar sub-query
+select not unique (select deptno from dept where false) from dept;
++--------+
+| EXPR$0 |
++--------+
+| false |
+| false |
+| false |
+| false |
++--------+
+(4 rows)
+
+!ok
+
+EnumerableCalc(expr#0..1=[{inputs}], expr#2=[false], EXPR$0=[$t2])
+ EnumerableValues(tuples=[[{ 10, 'Sales ' }, { 20, 'Marketing ' }, {
30, 'Engineering' }, { 40, 'Empty ' }]])
+!plan
+
# End sub-query.iq