This is an automated email from the ASF dual-hosted git repository. dmsysolyatin pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push: new d4b7342526 [CALCITE-5590] NullPointerException when converting 'in' expression that is used inside select list and group by d4b7342526 is described below commit d4b7342526ad52fcd4016b854322bc03e854bec4 Author: Dmitry Sysolyatin <dmsysolya...@gmail.com> AuthorDate: Thu Nov 14 18:11:01 2024 +0200 [CALCITE-5590] NullPointerException when converting 'in' expression that is used inside select list and group by --- .../org/apache/calcite/sql2rel/AggConverter.java | 2 +- .../apache/calcite/sql2rel/SqlToRelConverter.java | 76 +++++++++------------- .../apache/calcite/test/SqlToRelConverterTest.java | 10 +++ .../apache/calcite/test/SqlToRelConverterTest.xml | 15 +++++ core/src/test/resources/sql/agg.iq | 18 +++++ 5 files changed, 74 insertions(+), 47 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/sql2rel/AggConverter.java b/core/src/main/java/org/apache/calcite/sql2rel/AggConverter.java index 6193c33df4..65915b69e1 100644 --- a/core/src/main/java/org/apache/calcite/sql2rel/AggConverter.java +++ b/core/src/main/java/org/apache/calcite/sql2rel/AggConverter.java @@ -597,7 +597,7 @@ class AggConverter implements SqlVisitor<Void> { /** * If an expression is structurally identical to one of the group-by * expressions, returns a reference to the expression, otherwise returns - * null. + * -1. */ int lookupGroupExpr(SqlNode expr) { return SqlUtil.indexOfDeep(groupExprs, expr, Litmus.IGNORE); diff --git a/core/src/main/java/org/apache/calcite/sql2rel/SqlToRelConverter.java b/core/src/main/java/org/apache/calcite/sql2rel/SqlToRelConverter.java index 00bebe369f..571d6abc50 100644 --- a/core/src/main/java/org/apache/calcite/sql2rel/SqlToRelConverter.java +++ b/core/src/main/java/org/apache/calcite/sql2rel/SqlToRelConverter.java @@ -69,7 +69,6 @@ import org.apache.calcite.rel.logical.LogicalTableScan; import org.apache.calcite.rel.logical.LogicalValues; import org.apache.calcite.rel.metadata.RelColumnMapping; import org.apache.calcite.rel.metadata.RelMetadataQuery; -import org.apache.calcite.rel.rel2sql.SqlImplementor; import org.apache.calcite.rel.stream.Delta; import org.apache.calcite.rel.stream.LogicalDelta; import org.apache.calcite.rel.type.RelDataType; @@ -1168,15 +1167,7 @@ public class SqlToRelConverter { final Blackboard bb, final SqlNode expr, RelOptUtil.Logic logic) { - replaceSubQueries(bb, expr, logic, null); - } - - private void replaceSubQueries( - final Blackboard bb, - final SqlNode expr, - RelOptUtil.Logic logic, - final SqlImplementor.@Nullable Clause clause) { - findSubQueries(bb, expr, logic, false, clause); + findSubQueries(bb, expr, logic, false); for (SubQuery node : bb.subQueryList) { substituteSubQuery(bb, node); } @@ -2044,14 +2035,17 @@ public class SqlToRelConverter { * corresponds to a variation of a select * node, only register it if it's a scalar * sub-query - * @param clause A clause inside which sub-query is searched */ private void findSubQueries( Blackboard bb, SqlNode node, RelOptUtil.Logic logic, - boolean registerOnlyScalarSubQueries, - SqlImplementor.@Nullable Clause clause) { + boolean registerOnlyScalarSubQueries) { + // If node is structurally identical to one of the group-by, + // then it is not a sub-query; it is a reference. + if (bb.agg != null && bb.agg.lookupGroupExpr(node) != -1) { + return; + } final SqlKind kind = node.getKind(); switch (kind) { case EXISTS: @@ -2066,7 +2060,7 @@ public class SqlToRelConverter { case SCALAR_QUERY: if (!registerOnlyScalarSubQueries || (kind == SqlKind.SCALAR_QUERY)) { - bb.registerSubQuery(node, RelOptUtil.Logic.TRUE_FALSE, clause); + bb.registerSubQuery(node, RelOptUtil.Logic.TRUE_FALSE); } return; case IN: @@ -2098,7 +2092,7 @@ public class SqlToRelConverter { findSubQueries(bb, operand, logic, kind == SqlKind.IN || kind == SqlKind.NOT_IN || kind == SqlKind.SOME || kind == SqlKind.ALL - || registerOnlyScalarSubQueries, clause); + || registerOnlyScalarSubQueries); } } } else if (node instanceof SqlNodeList) { @@ -2106,7 +2100,7 @@ public class SqlToRelConverter { findSubQueries(bb, child, logic, kind == SqlKind.IN || kind == SqlKind.NOT_IN || kind == SqlKind.SOME || kind == SqlKind.ALL - || registerOnlyScalarSubQueries, clause); + || registerOnlyScalarSubQueries); } } @@ -2140,13 +2134,11 @@ public class SqlToRelConverter { && ((SqlQuantifyOperator) ((SqlCall) node).getOperator()) .tryDeriveTypeForCollection(bb.getValidator(), bb.scope, (SqlCall) node) != null) { - findSubQueries(bb, ((SqlCall) node).operand(0), logic, registerOnlyScalarSubQueries, - clause); - findSubQueries(bb, ((SqlCall) node).operand(1), logic, registerOnlyScalarSubQueries, - clause); + findSubQueries(bb, ((SqlCall) node).operand(0), logic, registerOnlyScalarSubQueries); + findSubQueries(bb, ((SqlCall) node).operand(1), logic, registerOnlyScalarSubQueries); break; } - bb.registerSubQuery(node, logic, clause); + bb.registerSubQuery(node, logic); break; default: break; @@ -3523,13 +3515,13 @@ public class SqlToRelConverter { // also replace sub-queries inside ordering spec in the aggregates replaceSubQueries(bb, aggregateFinder.orderList, RelOptUtil.Logic.TRUE_FALSE_UNKNOWN); + // If group-by clause is missing, pretend that it has zero elements. if (groupList == null) { groupList = SqlNodeList.EMPTY; } - replaceSubQueries(bb, groupList, RelOptUtil.Logic.TRUE_FALSE_UNKNOWN, - SqlImplementor.Clause.GROUP_BY); + replaceSubQueries(bb, groupList, RelOptUtil.Logic.TRUE_FALSE_UNKNOWN); // register the group exprs @@ -3633,8 +3625,7 @@ public class SqlToRelConverter { // This needs to be done separately from the sub-query inside // any aggregate in the select list, and after the aggregate rel // is allocated. - replaceSubQueries(bb, selectList, RelOptUtil.Logic.TRUE_FALSE_UNKNOWN, - SqlImplementor.Clause.SELECT); + replaceSubQueries(bb, selectList, RelOptUtil.Logic.TRUE_FALSE_UNKNOWN); // Now sub-queries in the entire select list have been converted. // Convert the select expressions to get the final list to be @@ -5534,30 +5525,26 @@ public class SqlToRelConverter { } } - void registerSubQuery(SqlNode node, RelOptUtil.Logic logic, - SqlImplementor.@Nullable Clause clause) { - if (getSubQuery(node, clause) == null) { - subQueryList.add(new SubQuery(node, logic, clause)); + void registerSubQuery(SqlNode node, RelOptUtil.Logic logic) { + for (SubQuery subQuery : subQueryList) { + // Compare the reference to make sure the matched node has + // exact scope where it belongs. + if (node == subQuery.node) { + return; + } } + subQueryList.add(new SubQuery(node, logic)); } - @Nullable SubQuery getSubQuery(SqlNode expr, SqlImplementor.@Nullable Clause exprClause) { + @Nullable SubQuery getSubQuery(SqlNode expr) { for (SubQuery subQuery : subQueryList) { // Compare the reference to make sure the matched node has // exact scope where it belongs. if (expr == subQuery.node) { return subQuery; } - - // Reference comparing does not work in case when select list has column which refers - // to the column inside `GROUP BY` clause. - // For example: SELECT deptno IN (1,2) FROM emp.deptno GROUP BY deptno IN (1,2); - if (exprClause == SqlImplementor.Clause.SELECT - && subQuery.clause == SqlImplementor.Clause.GROUP_BY - && expr.equalsDeep(subQuery.node, Litmus.IGNORE)) { - return subQuery; - } } + return null; } @@ -5732,7 +5719,7 @@ public class SqlToRelConverter { case CURSOR: case IN: case NOT_IN: - subQuery = getSubQuery(expr, null); + subQuery = getSubQuery(expr); if (subQuery == null && (kind == SqlKind.SOME || kind == SqlKind.ALL)) { break; } @@ -5748,7 +5735,7 @@ public class SqlToRelConverter { case ARRAY_QUERY_CONSTRUCTOR: case MAP_QUERY_CONSTRUCTOR: case MULTISET_QUERY_CONSTRUCTOR: - subQuery = requireNonNull(getSubQuery(expr, null)); + subQuery = requireNonNull(getSubQuery(expr)); rex = requireNonNull(subQuery.expr); if (((kind == SqlKind.SCALAR_QUERY) @@ -5934,7 +5921,7 @@ public class SqlToRelConverter { } @Override public RexRangeRef getSubQueryExpr(SqlCall call) { - final SubQuery subQuery = requireNonNull(getSubQuery(call, null)); + final SubQuery subQuery = requireNonNull(getSubQuery(call)); return (RexRangeRef) requireNonNull(subQuery.expr, () -> "subQuery.expr for " + call); } @@ -6277,13 +6264,10 @@ public class SqlToRelConverter { final SqlNode node; final RelOptUtil.Logic logic; @Nullable RexNode expr; - final SqlImplementor.@Nullable Clause clause; - private SubQuery(SqlNode node, RelOptUtil.Logic logic, - SqlImplementor.@Nullable Clause clause) { + private SubQuery(SqlNode node, RelOptUtil.Logic logic) { this.node = node; this.logic = logic; - this.clause = clause; } } diff --git a/core/src/test/java/org/apache/calcite/test/SqlToRelConverterTest.java b/core/src/test/java/org/apache/calcite/test/SqlToRelConverterTest.java index 0e2e835846..cdec09e5ee 100644 --- a/core/src/test/java/org/apache/calcite/test/SqlToRelConverterTest.java +++ b/core/src/test/java/org/apache/calcite/test/SqlToRelConverterTest.java @@ -523,6 +523,16 @@ class SqlToRelConverterTest extends SqlToRelTestBase { sql(sql).ok(); } + /** Test case for + * <a href="https://issues.apache.org/jira/browse/CALCITE-4549">[CALCITE-4549] + * IndexOutOfBoundsException when group view by a sub query</a>. */ + @Test void testGroupView() { + final String sql = "SELECT case when ENAME in( 'a', 'b') then 'c' else 'd' end\n" + + "from EMP_20\n" + + "group by case when ENAME in( 'a', 'b') then 'c' else 'd' end"; + sql(sql).ok(); + } + @Test void testGroupExpressionsInsideAndOut() { // Expressions inside and outside aggs. Common sub-expressions should be // eliminated: 'sal' always translates to expression #2. diff --git a/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml b/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml index 8fa9233809..5195579f8f 100644 --- a/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml +++ b/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml @@ -2560,6 +2560,21 @@ LogicalAggregate(group=[{0}], SUM_SAL=[SUM($1)]) <![CDATA[select deptno, sum(sal) as sum_sal from emp group by deptno]]> </Resource> </TestCase> + <TestCase name="testGroupView"> + <Resource name="sql"> + <![CDATA[SELECT case when ENAME in( 'a', 'b') then 'c' else 'd' end +from EMP_20 +group by case when ENAME in( 'a', 'b') then 'c' else 'd' end]]> + </Resource> + <Resource name="plan"> + <![CDATA[ +LogicalAggregate(group=[{0}]) + LogicalProject(EXPR$0=[CASE(SEARCH($1, Sarg['a':VARCHAR(20), 'b':VARCHAR(20)]:VARCHAR(20)), 'c', 'd')]) + LogicalFilter(condition=[AND(=($7, 20), >($5, 1000))]) + LogicalTableScan(table=[[CATALOG, SALES, EMP]]) +]]> + </Resource> + </TestCase> <TestCase name="testGroupingFunction"> <Resource name="sql"> <![CDATA[select diff --git a/core/src/test/resources/sql/agg.iq b/core/src/test/resources/sql/agg.iq index d459e62e74..3c5230b583 100644 --- a/core/src/test/resources/sql/agg.iq +++ b/core/src/test/resources/sql/agg.iq @@ -3684,6 +3684,24 @@ group by !ok +select + deptno, + case when deptno in (10) then 1 else 2 end as col1, + case when deptno in (10) then 5 else 6 end as col2 +from emp +group by deptno, case when deptno in (10) then 5 else 6 end +order by deptno; ++--------+------+------+ +| DEPTNO | COL1 | COL2 | ++--------+------+------+ +| 10 | 1 | 5 | +| 20 | 2 | 6 | +| 30 | 2 | 6 | ++--------+------+------+ +(3 rows) + +!ok + # Test case for [CALCITE-5388] tempList expression inside EnumerableWindow.getPartitionIterator should be unoptimized with CTE1(rownr1, val1) as ( select ROW_NUMBER() OVER(ORDER BY id ASC), id from (values (1), (2)) as Vals1(id) ),