This is an automated email from the ASF dual-hosted git repository.
rubenql pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new 7dd3ecd5a6 [CALCITE-5789] Query with two nested subqueries where the
inner-most references the outer-most table returns wrong result [CALCITE-5683]
Two level nested correlated subquery throws an exception during decorrelation
7dd3ecd5a6 is described below
commit 7dd3ecd5a61e876d929a0f4dea6eac41efce7d0c
Author: rubenada <[email protected]>
AuthorDate: Wed Jun 21 10:03:04 2023 +0100
[CALCITE-5789] Query with two nested subqueries where the inner-most
references the outer-most table returns wrong result
[CALCITE-5683] Two level nested correlated subquery throws an exception
during decorrelation
Co-authored-by: HanumathRao <[email protected]>
---
.../calcite/rel/rules/SubQueryRemoveRule.java | 7 +
.../apache/calcite/sql2rel/SqlToRelConverter.java | 12 +-
.../org/apache/calcite/test/RelOptRulesTest.java | 71 +++++++++
.../org/apache/calcite/test/RelOptRulesTest.xml | 160 +++++++++++++++++++++
core/src/test/resources/sql/misc.iq | 19 +++
core/src/test/resources/sql/sub-query.iq | 74 ++++++++++
6 files changed, 342 insertions(+), 1 deletion(-)
diff --git
a/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
b/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
index b0051231c7..46ac70f147 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
@@ -850,6 +850,7 @@ public class SubQueryRemoveRule
private static void matchFilter(SubQueryRemoveRule rule,
RelOptRuleCall call) {
final Filter filter = call.rel(0);
+ final Set<CorrelationId> filterVariablesSet = filter.getVariablesSet();
final RelBuilder builder = call.builder();
builder.push(filter.getInput());
int count = 0;
@@ -865,6 +866,12 @@ public class SubQueryRemoveRule
LogicVisitor.find(RelOptUtil.Logic.TRUE, ImmutableList.of(c), e);
final Set<CorrelationId> variablesSet =
RelOptUtil.getVariablesUsed(e.rel);
+ // Filter without variables could be handled before this change, we do
not want
+ // to break it yet for compatibility reason.
+ if (!filterVariablesSet.isEmpty()) {
+ // Only consider the correlated variables which originated from this
sub-query level.
+ variablesSet.retainAll(filterVariablesSet);
+ }
final RexNode target =
rule.apply(e, variablesSet, logic,
builder, 1, builder.peek().getRowType().getFieldCount(), count);
diff --git
a/core/src/main/java/org/apache/calcite/sql2rel/SqlToRelConverter.java
b/core/src/main/java/org/apache/calcite/sql2rel/SqlToRelConverter.java
index bea49d401f..6e44e8e764 100644
--- a/core/src/main/java/org/apache/calcite/sql2rel/SqlToRelConverter.java
+++ b/core/src/main/java/org/apache/calcite/sql2rel/SqlToRelConverter.java
@@ -3512,7 +3512,17 @@ public class SqlToRelConverter {
// implement HAVING (we have already checked that it is non-trivial)
relBuilder.push(bb.root());
- relBuilder.filter(havingExpr);
+ // Set the correlation variables used in this sub-query to the filter node,
+ // same logic is being used for the filter generated in where clause.
+ Set<CorrelationId> variableSet = new HashSet<>();
+ RexSubQuery subQ = RexUtil.SubQueryFinder.find(havingExpr);
+ if (subQ != null) {
+ CorrelationUse p = getCorrelationUse(bb, subQ.rel);
+ if (p != null) {
+ variableSet.add(p.id);
+ }
+ }
+ relBuilder.filter(variableSet, havingExpr);
// implement the SELECT list
relBuilder.project(projects.leftList(), projects.rightList())
diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
index d131204cf8..f61185dbe4 100644
--- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -6977,6 +6977,77 @@ class RelOptRulesTest extends RelOptTestBase {
.checkUnchanged();
}
+ /** Test case for CALCITE-5683 for two level nested decorrelate with
standard program
+ * failing during the decorrelation phase. The correlation variable is used
at two levels
+ * deep. */
+ @Test void testTwoLevelDecorrelate() {
+ final String sql = "SELECT d1.name, d1.deptno + (\n"
+ + "SELECT e1.empno\n"
+ + "FROM emp e1\n"
+ + "WHERE d1.deptno = e1.deptno and\n"
+ + " e1.sal = (SELECT max(sal)\n"
+ + " FROM emp e2\n"
+ + " WHERE e1.sal = e2.sal and\n"
+ + " e1.deptno = e2.deptno and\n"
+ + " d1.deptno < e2.deptno))\n"
+ + "FROM dept d1";
+
+ sql(sql)
+ .withSubQueryRules()
+ .withLateDecorrelate(true)
+ .withTrim(true)
+ .check();
+ }
+
+ /**
+ * Test case that SubQueryRemoveRule works with correlated Filter without
varibles.
+ */
+ @Test void testCorrelatedFilterWithoutVariable() {
+ // select *
+ // from dept
+ // where exists (select deptno
+ // from emp
+ // where dept.deptno = emp.deptno
+ // and emp.sal > 100)
+ final Holder<@Nullable RexCorrelVariable> v = Holder.empty();
+ final Function<RelBuilder, RelNode> relFn = b -> b
+ .scan("DEPT")
+ .variable(v::set)
+ .filter(
+ b.exists(b1 -> b1
+ .scan("EMP")
+ .filter(
+ b1.and(
+ b1.equals(b1.field(v.get(), "DEPTNO"), b1.field("DEPTNO")),
+ b1.greaterThan(b1.field("SAL"), b1.literal(100))))
+ .project(b1.field("DEPTNO"))
+ .build()))
+ .build();
+ relFn(relFn)
+ .withSubQueryRules()
+ .check();
+ }
+
+ /** Test case for CALCITE-5683 for two level nested decorrelate with
standard program
+ * failing during the decorrelation phase. The correlation variable is used
at the second
+ * level and is not used in the first level */
+ @Test void testCorrelatedVariableAtSecondLevel() {
+ final String sql = "SELECT d1.name, d1.deptno +(\n"
+ + "SELECT e1.empno\n"
+ + "FROM emp e1\n"
+ + "WHERE e1.sal = (SELECT max(sal)\n"
+ + " FROM emp e2\n"
+ + " WHERE e1.sal = e2.sal and\n"
+ + " e1.deptno = e2.deptno and\n"
+ + " d1.deptno < e2.deptno))\n"
+ + "FROM dept d1";
+ sql(sql)
+ .withSubQueryRules()
+ .withLateDecorrelate(true)
+ .withTrim(true)
+ .check();
+ }
+
/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-434">[CALCITE-434]
* Converting predicates on date dimension columns into date ranges</a>,
diff --git
a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index ef76d24570..7fb728af65 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -1729,6 +1729,97 @@ LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2],
MGR=[$3], HIREDATE=[$4], SAL=[$
<Resource name="planAfter">
<![CDATA[
EnumerableValues(tuples=[[]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testCorrelatedFilterWithoutVariable">
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalFilter(condition=[EXISTS({
+LogicalProject(DEPTNO=[$7])
+ LogicalFilter(condition=[AND(=($cor0.DEPTNO, $7), >($5, 100))])
+ LogicalTableScan(table=[[scott, EMP]])
+})])
+ LogicalTableScan(table=[[scott, DEPT]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2])
+ LogicalCorrelate(correlation=[$cor0], joinType=[inner],
requiredColumns=[{0}])
+ LogicalTableScan(table=[[scott, DEPT]])
+ LogicalAggregate(group=[{0}])
+ LogicalProject(i=[true])
+ LogicalFilter(condition=[AND(=($cor0.DEPTNO, $7), >($5, 100))])
+ LogicalTableScan(table=[[scott, EMP]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testCorrelatedVariableAtSecondLevel">
+ <Resource name="sql">
+ <![CDATA[SELECT d1.name, d1.deptno +(
+SELECT e1.empno
+FROM emp e1
+WHERE e1.sal = (SELECT max(sal)
+ FROM emp e2
+ WHERE e1.sal = e2.sal and
+ e1.deptno = e2.deptno and
+ d1.deptno < e2.deptno))
+FROM dept d1]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(NAME=[$1], EXPR$1=[+($0, $SCALAR_QUERY({
+LogicalProject(EMPNO=[$0])
+ LogicalFilter(condition=[=($5, $SCALAR_QUERY({
+LogicalAggregate(group=[{}], EXPR$0=[MAX($0)])
+ LogicalProject(SAL=[$5])
+ LogicalFilter(condition=[AND(=($cor0.SAL, $5), =($cor0.DEPTNO, $7),
<($cor2.DEPTNO, $7))])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+}))], variablesSet=[[$cor0]])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+}))])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ <Resource name="planMid">
+ <![CDATA[
+LogicalProject(NAME=[$1], EXPR$1=[+($0, $2)])
+ LogicalCorrelate(correlation=[$cor2], joinType=[left], requiredColumns=[{0}])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+ LogicalAggregate(group=[{}], agg#0=[SINGLE_VALUE($0)])
+ LogicalProject(EMPNO=[$0])
+ LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3],
HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+ LogicalFilter(condition=[=($5, $9)])
+ LogicalCorrelate(correlation=[$cor0], joinType=[left],
requiredColumns=[{5, 7}])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalAggregate(group=[{}], EXPR$0=[MAX($0)])
+ LogicalProject(SAL=[$5])
+ LogicalFilter(condition=[AND(=($cor0.SAL, $5),
=($cor0.DEPTNO, $7), <($cor2.DEPTNO, $7))])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(NAME=[$1], EXPR$1=[+($0, $3)])
+ LogicalJoin(condition=[=($0, $2)], joinType=[left])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+ LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
+ LogicalProject(DEPTNO00=[$11], EMPNO=[$0])
+ LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3],
HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8],
SAL0=[CAST($9):INTEGER], DEPTNO0=[CAST($10):INTEGER],
DEPTNO00=[CAST($11):INTEGER], EXPR$0=[CAST($12):INTEGER])
+ LogicalJoin(condition=[AND(=($5, $9), =($7, $10))], joinType=[inner])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalFilter(condition=[=($0, $3)])
+ LogicalAggregate(group=[{0, 1, 2}], EXPR$0=[MAX($3)])
+ LogicalProject(SAL0=[$9], DEPTNO0=[$10], DEPTNO00=[$11],
SAL=[$5])
+ LogicalJoin(condition=[AND(=($9, $5), =($10, $7), <($11,
$7))], joinType=[inner])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalJoin(condition=[true], joinType=[inner])
+ LogicalAggregate(group=[{0, 1}])
+ LogicalProject(SAL=[$5], DEPTNO=[$7])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalProject(DEPTNO=[$0])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
]]>
</Resource>
</TestCase>
@@ -14940,6 +15031,75 @@ LogicalProject(DEPTNO=[$0], DEPTNO0=[$1])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
LogicalProject(DEPTNO=[$7])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ </TestCase>
+ <TestCase name="testTwoLevelDecorrelate">
+ <Resource name="sql">
+ <![CDATA[SELECT d1.name, d1.deptno + (
+SELECT e1.empno
+FROM emp e1
+WHERE d1.deptno = e1.deptno and
+ e1.sal = (SELECT max(sal)
+ FROM emp e2
+ WHERE e1.sal = e2.sal and
+ e1.deptno = e2.deptno and
+ d1.deptno < e2.deptno))
+FROM dept d1]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(NAME=[$1], EXPR$1=[+($0, $SCALAR_QUERY({
+LogicalProject(EMPNO=[$0])
+ LogicalFilter(condition=[AND(=($cor0.DEPTNO, $7), =($5, $SCALAR_QUERY({
+LogicalAggregate(group=[{}], EXPR$0=[MAX($0)])
+ LogicalProject(SAL=[$5])
+ LogicalFilter(condition=[AND(=($cor1.SAL, $5), =($cor1.DEPTNO, $7),
<($cor0.DEPTNO, $7))])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+})))], variablesSet=[[$cor1]])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+}))])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ <Resource name="planMid">
+ <![CDATA[
+LogicalProject(NAME=[$1], EXPR$1=[+($0, $2)])
+ LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{0}])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+ LogicalAggregate(group=[{}], agg#0=[SINGLE_VALUE($0)])
+ LogicalProject(EMPNO=[$0])
+ LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3],
HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+ LogicalFilter(condition=[AND(=($cor0.DEPTNO, $7), =($5, $9))])
+ LogicalCorrelate(correlation=[$cor1], joinType=[left],
requiredColumns=[{5, 7}])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalAggregate(group=[{}], EXPR$0=[MAX($0)])
+ LogicalProject(SAL=[$5])
+ LogicalFilter(condition=[AND(=($cor1.SAL, $5),
=($cor1.DEPTNO, $7), <($cor0.DEPTNO, $7))])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(NAME=[$1], EXPR$1=[+($0, $3)])
+ LogicalJoin(condition=[=($0, $2)], joinType=[left])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+ LogicalAggregate(group=[{0}], agg#0=[SINGLE_VALUE($1)])
+ LogicalProject(DEPTNO0=[$10], EMPNO=[$0])
+ LogicalFilter(condition=[AND(=($10, $7), =($5, $9))])
+ LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3],
HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], EXPR$0=[$12],
DEPTNO0=[$9])
+ LogicalJoin(condition=[AND(=($5, $10), =($7, $11))],
joinType=[left])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalAggregate(group=[{0, 1, 2}], EXPR$0=[MAX($3)])
+ LogicalProject(DEPTNO0=[$9], SAL0=[$10], DEPTNO00=[$11],
SAL=[$5])
+ LogicalJoin(condition=[AND(=($10, $5), =($11, $7), <($9,
$7))], joinType=[inner])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalJoin(condition=[true], joinType=[inner])
+ LogicalProject(DEPTNO=[$0])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+ LogicalAggregate(group=[{0, 1}])
+ LogicalProject(SAL=[$5], DEPTNO=[$7])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
diff --git a/core/src/test/resources/sql/misc.iq
b/core/src/test/resources/sql/misc.iq
index 64554c0556..c4419ee456 100644
--- a/core/src/test/resources/sql/misc.iq
+++ b/core/src/test/resources/sql/misc.iq
@@ -777,6 +777,25 @@ having exists
!ok
+# Having with correlation anded with normal condition.
+with src (key, "value")
+ as (select * from (values (1, 'a'), (2, 'z')) as t(key, "value"))
+select b.key, count(*) as c
+from src b
+group by b.key
+having exists
+ (select a.key
+ from src a
+ where a.key = b.key and a."value" > 'val_9') and b.key > 0;
++-----+---+
+| KEY | C |
++-----+---+
+| 2 | 1 |
++-----+---+
+(1 row)
+
+!ok
+
# [CALCITE-411] Duplicate aliases
select 1 as a, 2 as a from (values (true));
+---+---+
diff --git a/core/src/test/resources/sql/sub-query.iq
b/core/src/test/resources/sql/sub-query.iq
index 3cabe1bdf1..f704fb41e5 100644
--- a/core/src/test/resources/sql/sub-query.iq
+++ b/core/src/test/resources/sql/sub-query.iq
@@ -3611,4 +3611,78 @@ from emp;
!ok
+# Test case for [CALCITE-5789]
+select deptno from dept d1 where exists (
+ select 1 from dept d2 where d2.deptno = d1.deptno and exists (
+ select 1 from dept d3 where d3.deptno = d2.deptno and d3.dname = d1.dname));
++--------+
+| DEPTNO |
++--------+
+| 10 |
+| 20 |
+| 30 |
+| 40 |
++--------+
+(4 rows)
+
+!ok
+EnumerableCalc(expr#0..3=[{inputs}], DEPTNO=[$t2])
+ EnumerableHashJoin(condition=[AND(=($0, $2), =($1, $3))], joinType=[inner])
+ EnumerableCalc(expr#0..2=[{inputs}], proj#0..1=[{exprs}])
+ EnumerableMergeJoin(condition=[=($0, $2)], joinType=[inner])
+ EnumerableCalc(expr#0..2=[{inputs}], DEPTNO=[$t0])
+ EnumerableTableScan(table=[[scott, DEPT]])
+ EnumerableCalc(expr#0..2=[{inputs}], expr#3=[IS NOT NULL($t1)],
DNAME=[$t1], DEPTNO=[$t0], $condition=[$t3])
+ EnumerableTableScan(table=[[scott, DEPT]])
+ EnumerableCalc(expr#0..2=[{inputs}], proj#0..1=[{exprs}])
+ EnumerableTableScan(table=[[scott, DEPT]])
+!plan
+
+# Test case for CALCITE-5683 which throws an exception during the
de-correlation phase
+SELECT d1.dname, d1.deptno + (
+ SELECT max(e1.empno)
+ FROM emp e1
+ WHERE d1.deptno = e1.deptno and
+ e1.sal = (
+ SELECT max(sal)
+ FROM emp e2
+ WHERE e1.sal = e2.sal and
+ e1.deptno = e2.deptno and
+ d1.deptno <= e2.deptno))
+FROM dept d1;
++------------+--------+
+| DNAME | EXPR$1 |
++------------+--------+
+| ACCOUNTING | 7944 |
+| OPERATIONS | |
+| RESEARCH | 7922 |
+| SALES | 7930 |
++------------+--------+
+(4 rows)
+
+!ok
+
+# Test case for CALCITE-5683 which throws an exception during the
de-correlation phase
+SELECT d1.dname, d1.deptno + (
+ SELECT max(e1.empno)
+ FROM emp e1
+ WHERE d1.deptno = e1.deptno and
+ e1.sal = (SELECT max(sal)
+ FROM emp e2
+ WHERE e1.sal = e2.sal and
+ e1.deptno = e2.deptno and
+ d1.deptno < e2.deptno))
+FROM dept d1;
++------------+--------+
+| DNAME | EXPR$1 |
++------------+--------+
+| ACCOUNTING | |
+| OPERATIONS | |
+| RESEARCH | |
+| SALES | |
++------------+--------+
+(4 rows)
+
+!ok
+
# End sub-query.iq