This is an automated email from the ASF dual-hosted git repository. rubenql pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push: new 7dd3ecd5a6 [CALCITE-5789] Query with two nested subqueries where the inner-most references the outer-most table returns wrong result [CALCITE-5683] Two level nested correlated subquery throws an exception during decorrelation 7dd3ecd5a6 is described below commit 7dd3ecd5a61e876d929a0f4dea6eac41efce7d0c Author: rubenada <rube...@gmail.com> AuthorDate: Wed Jun 21 10:03:04 2023 +0100 [CALCITE-5789] Query with two nested subqueries where the inner-most references the outer-most table returns wrong result [CALCITE-5683] Two level nested correlated subquery throws an exception during decorrelation Co-authored-by: HanumathRao <hanu....@gmail.com> --- .../calcite/rel/rules/SubQueryRemoveRule.java | 7 + .../apache/calcite/sql2rel/SqlToRelConverter.java | 12 +- .../org/apache/calcite/test/RelOptRulesTest.java | 71 +++++++++ .../org/apache/calcite/test/RelOptRulesTest.xml | 160 +++++++++++++++++++++ core/src/test/resources/sql/misc.iq | 19 +++ core/src/test/resources/sql/sub-query.iq | 74 ++++++++++ 6 files changed, 342 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java b/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java index b0051231c7..46ac70f147 100644 --- a/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java +++ b/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java @@ -850,6 +850,7 @@ public class SubQueryRemoveRule private static void matchFilter(SubQueryRemoveRule rule, RelOptRuleCall call) { final Filter filter = call.rel(0); + final Set<CorrelationId> filterVariablesSet = filter.getVariablesSet(); final RelBuilder builder = call.builder(); builder.push(filter.getInput()); int count = 0; @@ -865,6 +866,12 @@ public class SubQueryRemoveRule LogicVisitor.find(RelOptUtil.Logic.TRUE, ImmutableList.of(c), e); final Set<CorrelationId> variablesSet = RelOptUtil.getVariablesUsed(e.rel); + // Filter without variables could be handled before this change, we do not want + // to break it yet for compatibility reason. + if (!filterVariablesSet.isEmpty()) { + // Only consider the correlated variables which originated from this sub-query level. + variablesSet.retainAll(filterVariablesSet); + } final RexNode target = rule.apply(e, variablesSet, logic, builder, 1, builder.peek().getRowType().getFieldCount(), count); diff --git a/core/src/main/java/org/apache/calcite/sql2rel/SqlToRelConverter.java b/core/src/main/java/org/apache/calcite/sql2rel/SqlToRelConverter.java index bea49d401f..6e44e8e764 100644 --- a/core/src/main/java/org/apache/calcite/sql2rel/SqlToRelConverter.java +++ b/core/src/main/java/org/apache/calcite/sql2rel/SqlToRelConverter.java @@ -3512,7 +3512,17 @@ public class SqlToRelConverter { // implement HAVING (we have already checked that it is non-trivial) relBuilder.push(bb.root()); - relBuilder.filter(havingExpr); + // Set the correlation variables used in this sub-query to the filter node, + // same logic is being used for the filter generated in where clause. + Set<CorrelationId> variableSet = new HashSet<>(); + RexSubQuery subQ = RexUtil.SubQueryFinder.find(havingExpr); + if (subQ != null) { + CorrelationUse p = getCorrelationUse(bb, subQ.rel); + if (p != null) { + variableSet.add(p.id); + } + } + relBuilder.filter(variableSet, havingExpr); // implement the SELECT list relBuilder.project(projects.leftList(), projects.rightList()) diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java index d131204cf8..f61185dbe4 100644 --- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java +++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java @@ -6977,6 +6977,77 @@ class RelOptRulesTest extends RelOptTestBase { .checkUnchanged(); } + /** Test case for CALCITE-5683 for two level nested decorrelate with standard program + * failing during the decorrelation phase. The correlation variable is used at two levels + * deep. */ + @Test void testTwoLevelDecorrelate() { + final String sql = "SELECT d1.name, d1.deptno + (\n" + + "SELECT e1.empno\n" + + "FROM emp e1\n" + + "WHERE d1.deptno = e1.deptno and\n" + + " e1.sal = (SELECT max(sal)\n" + + " FROM emp e2\n" + + " WHERE e1.sal = e2.sal and\n" + + " e1.deptno = e2.deptno and\n" + + " d1.deptno < e2.deptno))\n" + + "FROM dept d1"; + + sql(sql) + .withSubQueryRules() + .withLateDecorrelate(true) + .withTrim(true) + .check(); + } + + /** + * Test case that SubQueryRemoveRule works with correlated Filter without varibles. + */ + @Test void testCorrelatedFilterWithoutVariable() { + // select * + // from dept + // where exists (select deptno + // from emp + // where dept.deptno = emp.deptno + // and emp.sal > 100) + final Holder<@Nullable RexCorrelVariable> v = Holder.empty(); + final Function<RelBuilder, RelNode> relFn = b -> b + .scan("DEPT") + .variable(v::set) + .filter( + b.exists(b1 -> b1 + .scan("EMP") + .filter( + b1.and( + b1.equals(b1.field(v.get(), "DEPTNO"), b1.field("DEPTNO")), + b1.greaterThan(b1.field("SAL"), b1.literal(100)))) + .project(b1.field("DEPTNO")) + .build())) + .build(); + relFn(relFn) + .withSubQueryRules() + .check(); + } + + /** Test case for CALCITE-5683 for two level nested decorrelate with standard program + * failing during the decorrelation phase. The correlation variable is used at the second + * level and is not used in the first level */ + @Test void testCorrelatedVariableAtSecondLevel() { + final String sql = "SELECT d1.name, d1.deptno +(\n" + + "SELECT e1.empno\n" + + "FROM emp e1\n" + + "WHERE e1.sal = (SELECT max(sal)\n" + + " FROM emp e2\n" + + " WHERE e1.sal = e2.sal and\n" + + " e1.deptno = e2.deptno and\n" + + " d1.deptno < e2.deptno))\n" + + "FROM dept d1"; + sql(sql) + .withSubQueryRules() + .withLateDecorrelate(true) + .withTrim(true) + .check(); + } + /** Test case for * <a href="https://issues.apache.org/jira/browse/CALCITE-434">[CALCITE-434] * Converting predicates on date dimension columns into date ranges</a>, diff --git a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml index ef76d24570..7fb728af65 100644 --- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml +++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml @@ -1729,6 +1729,97 @@ LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$ <Resource name="planAfter"> <![CDATA[ EnumerableValues(tuples=[[]]) +]]> + </Resource> + </TestCase> + <TestCase name="testCorrelatedFilterWithoutVariable"> + <Resource name="planBefore"> + <![CDATA[ +LogicalFilter(condition=[EXISTS({ +LogicalProject(DEPTNO=[$7]) + LogicalFilter(condition=[AND(=($cor0.DEPTNO, $7), >($5, 100))]) + LogicalTableScan(table=[[scott, EMP]]) +})]) + LogicalTableScan(table=[[scott, DEPT]]) +]]> + </Resource> + <Resource name="planAfter"> + <![CDATA[ +LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2]) + LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{0}]) + LogicalTableScan(table=[[scott, DEPT]]) + LogicalAggregate(group=[{0}]) + LogicalProject(i=[true]) + LogicalFilter(condition=[AND(=($cor0.DEPTNO, $7), >($5, 100))]) + LogicalTableScan(table=[[scott, EMP]]) +]]> + </Resource> + </TestCase> + <TestCase name="testCorrelatedVariableAtSecondLevel"> + <Resource name="sql"> + <![CDATA[SELECT d1.name, d1.deptno +( +SELECT e1.empno +FROM emp e1 +WHERE e1.sal = (SELECT max(sal) + FROM emp e2 + WHERE e1.sal = e2.sal and + e1.deptno = e2.deptno and + d1.deptno < e2.deptno)) +FROM dept d1]]> + </Resource> + <Resource name="planBefore"> + <![CDATA[ +LogicalProject(NAME=[$1], EXPR$1=[+($0, $SCALAR_QUERY({ +LogicalProject(EMPNO=[$0]) + LogicalFilter(condition=[=($5, $SCALAR_QUERY({ +LogicalAggregate(group=[{}], EXPR$0=[MAX($0)]) + LogicalProject(SAL=[$5]) + LogicalFilter(condition=[AND(=($cor0.SAL, $5), =($cor0.DEPTNO, $7), <($cor2.DEPTNO, $7))]) + LogicalTableScan(table=[[CATALOG, SALES, EMP]]) +}))], variablesSet=[[$cor0]]) + LogicalTableScan(table=[[CATALOG, SALES, EMP]]) +}))]) + LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) +]]> + </Resource> + <Resource name="planMid"> + <![CDATA[ +LogicalProject(NAME=[$1], EXPR$1=[+($0, $2)]) + LogicalCorrelate(correlation=[$cor2], joinType=[left], requiredColumns=[{0}]) + LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) + LogicalAggregate(group=[{}], agg#0=[SINGLE_VALUE($0)]) + LogicalProject(EMPNO=[$0]) + LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8]) + LogicalFilter(condition=[=($5, $9)]) + LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{5, 7}]) + LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + LogicalAggregate(group=[{}], EXPR$0=[MAX($0)]) + LogicalProject(SAL=[$5]) + LogicalFilter(condition=[AND(=($cor0.SAL, $5), =($cor0.DEPTNO, $7), <($cor2.DEPTNO, $7))]) + LogicalTableScan(table=[[CATALOG, SALES, EMP]]) +]]> + </Resource> + <Resource name="planAfter"> + <![CDATA[ +LogicalProject(NAME=[$1], EXPR$1=[+($0, $3)]) + LogicalJoin(condition=[=($0, $2)], joinType=[left]) + LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) + LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)]) + LogicalProject(DEPTNO00=[$11], EMPNO=[$0]) + LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], SAL0=[CAST($9):INTEGER], DEPTNO0=[CAST($10):INTEGER], DEPTNO00=[CAST($11):INTEGER], EXPR$0=[CAST($12):INTEGER]) + LogicalJoin(condition=[AND(=($5, $9), =($7, $10))], joinType=[inner]) + LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + LogicalFilter(condition=[=($0, $3)]) + LogicalAggregate(group=[{0, 1, 2}], EXPR$0=[MAX($3)]) + LogicalProject(SAL0=[$9], DEPTNO0=[$10], DEPTNO00=[$11], SAL=[$5]) + LogicalJoin(condition=[AND(=($9, $5), =($10, $7), <($11, $7))], joinType=[inner]) + LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + LogicalJoin(condition=[true], joinType=[inner]) + LogicalAggregate(group=[{0, 1}]) + LogicalProject(SAL=[$5], DEPTNO=[$7]) + LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + LogicalProject(DEPTNO=[$0]) + LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) ]]> </Resource> </TestCase> @@ -14940,6 +15031,75 @@ LogicalProject(DEPTNO=[$0], DEPTNO0=[$1]) LogicalTableScan(table=[[CATALOG, SALES, EMP]]) LogicalProject(DEPTNO=[$7]) LogicalTableScan(table=[[CATALOG, SALES, EMP]]) +]]> + </Resource> + </TestCase> + <TestCase name="testTwoLevelDecorrelate"> + <Resource name="sql"> + <![CDATA[SELECT d1.name, d1.deptno + ( +SELECT e1.empno +FROM emp e1 +WHERE d1.deptno = e1.deptno and + e1.sal = (SELECT max(sal) + FROM emp e2 + WHERE e1.sal = e2.sal and + e1.deptno = e2.deptno and + d1.deptno < e2.deptno)) +FROM dept d1]]> + </Resource> + <Resource name="planBefore"> + <![CDATA[ +LogicalProject(NAME=[$1], EXPR$1=[+($0, $SCALAR_QUERY({ +LogicalProject(EMPNO=[$0]) + LogicalFilter(condition=[AND(=($cor0.DEPTNO, $7), =($5, $SCALAR_QUERY({ +LogicalAggregate(group=[{}], EXPR$0=[MAX($0)]) + LogicalProject(SAL=[$5]) + LogicalFilter(condition=[AND(=($cor1.SAL, $5), =($cor1.DEPTNO, $7), <($cor0.DEPTNO, $7))]) + LogicalTableScan(table=[[CATALOG, SALES, EMP]]) +})))], variablesSet=[[$cor1]]) + LogicalTableScan(table=[[CATALOG, SALES, EMP]]) +}))]) + LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) +]]> + </Resource> + <Resource name="planMid"> + <![CDATA[ +LogicalProject(NAME=[$1], EXPR$1=[+($0, $2)]) + LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{0}]) + LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) + LogicalAggregate(group=[{}], agg#0=[SINGLE_VALUE($0)]) + LogicalProject(EMPNO=[$0]) + LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8]) + LogicalFilter(condition=[AND(=($cor0.DEPTNO, $7), =($5, $9))]) + LogicalCorrelate(correlation=[$cor1], joinType=[left], requiredColumns=[{5, 7}]) + LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + LogicalAggregate(group=[{}], EXPR$0=[MAX($0)]) + LogicalProject(SAL=[$5]) + LogicalFilter(condition=[AND(=($cor1.SAL, $5), =($cor1.DEPTNO, $7), <($cor0.DEPTNO, $7))]) + LogicalTableScan(table=[[CATALOG, SALES, EMP]]) +]]> + </Resource> + <Resource name="planAfter"> + <![CDATA[ +LogicalProject(NAME=[$1], EXPR$1=[+($0, $3)]) + LogicalJoin(condition=[=($0, $2)], joinType=[left]) + LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) + LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)]) + LogicalProject(DEPTNO0=[$10], EMPNO=[$0]) + LogicalFilter(condition=[AND(=($10, $7), =($5, $9))]) + LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], EXPR$0=[$12], DEPTNO0=[$9]) + LogicalJoin(condition=[AND(=($5, $10), =($7, $11))], joinType=[left]) + LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + LogicalAggregate(group=[{0, 1, 2}], EXPR$0=[MAX($3)]) + LogicalProject(DEPTNO0=[$9], SAL0=[$10], DEPTNO00=[$11], SAL=[$5]) + LogicalJoin(condition=[AND(=($10, $5), =($11, $7), <($9, $7))], joinType=[inner]) + LogicalTableScan(table=[[CATALOG, SALES, EMP]]) + LogicalJoin(condition=[true], joinType=[inner]) + LogicalProject(DEPTNO=[$0]) + LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) + LogicalAggregate(group=[{0, 1}]) + LogicalProject(SAL=[$5], DEPTNO=[$7]) + LogicalTableScan(table=[[CATALOG, SALES, EMP]]) ]]> </Resource> </TestCase> diff --git a/core/src/test/resources/sql/misc.iq b/core/src/test/resources/sql/misc.iq index 64554c0556..c4419ee456 100644 --- a/core/src/test/resources/sql/misc.iq +++ b/core/src/test/resources/sql/misc.iq @@ -777,6 +777,25 @@ having exists !ok +# Having with correlation anded with normal condition. +with src (key, "value") + as (select * from (values (1, 'a'), (2, 'z')) as t(key, "value")) +select b.key, count(*) as c +from src b +group by b.key +having exists + (select a.key + from src a + where a.key = b.key and a."value" > 'val_9') and b.key > 0; ++-----+---+ +| KEY | C | ++-----+---+ +| 2 | 1 | ++-----+---+ +(1 row) + +!ok + # [CALCITE-411] Duplicate aliases select 1 as a, 2 as a from (values (true)); +---+---+ diff --git a/core/src/test/resources/sql/sub-query.iq b/core/src/test/resources/sql/sub-query.iq index 3cabe1bdf1..f704fb41e5 100644 --- a/core/src/test/resources/sql/sub-query.iq +++ b/core/src/test/resources/sql/sub-query.iq @@ -3611,4 +3611,78 @@ from emp; !ok +# Test case for [CALCITE-5789] +select deptno from dept d1 where exists ( + select 1 from dept d2 where d2.deptno = d1.deptno and exists ( + select 1 from dept d3 where d3.deptno = d2.deptno and d3.dname = d1.dname)); ++--------+ +| DEPTNO | ++--------+ +| 10 | +| 20 | +| 30 | +| 40 | ++--------+ +(4 rows) + +!ok +EnumerableCalc(expr#0..3=[{inputs}], DEPTNO=[$t2]) + EnumerableHashJoin(condition=[AND(=($0, $2), =($1, $3))], joinType=[inner]) + EnumerableCalc(expr#0..2=[{inputs}], proj#0..1=[{exprs}]) + EnumerableMergeJoin(condition=[=($0, $2)], joinType=[inner]) + EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0]) + EnumerableTableScan(table=[[scott, DEPT]]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[IS NOT NULL($t1)], DNAME=[$t1], DEPTNO=[$t0], $condition=[$t3]) + EnumerableTableScan(table=[[scott, DEPT]]) + EnumerableCalc(expr#0..2=[{inputs}], proj#0..1=[{exprs}]) + EnumerableTableScan(table=[[scott, DEPT]]) +!plan + +# Test case for CALCITE-5683 which throws an exception during the de-correlation phase +SELECT d1.dname, d1.deptno + ( + SELECT max(e1.empno) + FROM emp e1 + WHERE d1.deptno = e1.deptno and + e1.sal = ( + SELECT max(sal) + FROM emp e2 + WHERE e1.sal = e2.sal and + e1.deptno = e2.deptno and + d1.deptno <= e2.deptno)) +FROM dept d1; ++------------+--------+ +| DNAME | EXPR$1 | ++------------+--------+ +| ACCOUNTING | 7944 | +| OPERATIONS | | +| RESEARCH | 7922 | +| SALES | 7930 | ++------------+--------+ +(4 rows) + +!ok + +# Test case for CALCITE-5683 which throws an exception during the de-correlation phase +SELECT d1.dname, d1.deptno + ( + SELECT max(e1.empno) + FROM emp e1 + WHERE d1.deptno = e1.deptno and + e1.sal = (SELECT max(sal) + FROM emp e2 + WHERE e1.sal = e2.sal and + e1.deptno = e2.deptno and + d1.deptno < e2.deptno)) +FROM dept d1; ++------------+--------+ +| DNAME | EXPR$1 | ++------------+--------+ +| ACCOUNTING | | +| OPERATIONS | | +| RESEARCH | | +| SALES | | ++------------+--------+ +(4 rows) + +!ok + # End sub-query.iq