[ 
https://issues.apache.org/jira/browse/CALCITE-5209?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Ian Bertolacci updated CALCITE-5209:
------------------------------------
    Description: 
For these examples assume that:
- {{`SqlToRelConverter.Config.config().withInSubQueryThreshold(5)`}} has been 
set.
- The source table has 4 columns (Column_0 through Column_3)
- Each column is of type BIGINT NOT NULL

The failing query is:
{code}
select
    case
        when Column_3 in (1, 2, 3, 4, 5) THEN 1
        else 0
    end
from T1000
group by
    case
        when Column_3 in (1, 2, 3, 4, 5) THEN 1
        else 0
    end
{code}

The exception is:
{code}
3
java.lang.ArrayIndexOutOfBoundsException: 3
        at 
com.google.common.collect.RegularImmutableList.get(RegularImmutableList.java:75)
        at org.apache.calcite.tools.RelBuilder.inferAlias(RelBuilder.java:2163)
        at org.apache.calcite.tools.RelBuilder.project_(RelBuilder.java:1956)
        at org.apache.calcite.tools.RelBuilder.project(RelBuilder.java:1797)
        at org.apache.calcite.tools.RelBuilder.project(RelBuilder.java:1769)
        at org.apache.calcite.tools.RelBuilder.project(RelBuilder.java:1758)
        at 
org.apache.calcite.sql2rel.SqlToRelConverter$Blackboard.register(SqlToRelConverter.java:4680)
        at 
org.apache.calcite.sql2rel.SqlToRelConverter.substituteSubQuery(SqlToRelConverter.java:1268)
        at 
org.apache.calcite.sql2rel.SqlToRelConverter.replaceSubQueries(SqlToRelConverter.java:1127)
        at 
org.apache.calcite.sql2rel.SqlToRelConverter.createAggImpl(SqlToRelConverter.java:3325)
        at 
org.apache.calcite.sql2rel.SqlToRelConverter.convertAgg(SqlToRelConverter.java:3192)
        at 
org.apache.calcite.sql2rel.SqlToRelConverter.convertSelectImpl(SqlToRelConverter.java:738)
        at 
org.apache.calcite.sql2rel.SqlToRelConverter.convertSelect(SqlToRelConverter.java:664)
        at 
org.apache.calcite.sql2rel.SqlToRelConverter.convertQueryRecursive(SqlToRelConverter.java:3589)
        at 
org.apache.calcite.sql2rel.SqlToRelConverter.convertQuery(SqlToRelConverter.java:589)
        at org.apache.calcite.prepare.PlannerImpl.rel(PlannerImpl.java:259)
{code}

The table could contain N >= 4 columns, and the use of any column at or after 
index 3 will also cause this exception.
The use of any column  before index 3 gives the RelNode tree:
{code}
42:LogicalProject(EXPR$0=[$0])
  41:LogicalJoin(condition=[=($3, $4)], joinType=[left])
    37:LogicalProject(EXPR$0=[$0], $f0=[$1], $f1=[$2], $f10=[$2])
      36:LogicalJoin(condition=[true], joinType=[inner])
        32:LogicalAggregate(group=[{0}])
          31:LogicalProject(EXPR$0=[CASE(CAST(OR(AND(IS NOT NULL($8), <>($4, 
0)), AND(<($5, $4), null, <>($4, 0), IS NULL($8)))):BOOLEAN NOT NULL, 1, 0)])
            30:LogicalJoin(condition=[=($6, $7)], joinType=[left])
              26:LogicalProject(Column_0=[$0], Column_1=[$1], Column_2=[$2], 
Column_3=[$3], $f0=[$4], $f1=[$5], Column_20=[$2])
                25:LogicalJoin(condition=[true], joinType=[inner])
                  21:TableScan(....)
                  24:LogicalAggregate(group=[{}], agg#0=[COUNT()], 
agg#1=[COUNT($0)])
                    23:LogicalProject(ROW_VALUE=[$0], $f1=[true])
                      22:LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 
}]])
              29:LogicalAggregate(group=[{0}], agg#0=[MIN($1)])
                28:LogicalProject(ROW_VALUE=[$0], $f1=[true])
                  27:LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 }]])
        35:LogicalAggregate(group=[{}], agg#0=[COUNT()], agg#1=[COUNT($0)])
          34:LogicalProject(ROW_VALUE=[$0], $f1=[true])
            33:LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 }]])
    40:LogicalAggregate(group=[{0}], agg#0=[MIN($1)])
      39:LogicalProject(ROW_VALUE=[$0], $f1=[true])
        38:LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 }]])
{code}

If the number of predicates is less than the subquery threshold, there are no 
issues.

Additionally, the below queries also produce RelNode trees
Using an alias:
{code}
select
    case
        when Column_3 in (1, 2, 3, 4, 5) THEN 1
        else 0
    end as CASE_ALIAS
from T1000
group by
    CASE_ALIAS
{code}
{code}
23:LogicalAggregate(group=[{0}])
  22:LogicalProject(CASE_ALIAS=[CASE(AND(<>($4, 0), IS NOT NULL($8)), 1, 0)])
    21:LogicalJoin(condition=[=($6, $7)], joinType=[left])
      17:LogicalProject(Column_0=[$0], Column_1=[$1], Column_2=[$2], 
Column_3=[$3], $f0=[$4], $f1=[$5], Column_30=[$3])
        16:LogicalJoin(condition=[true], joinType=[inner])
          12:TableScan(...)
          15:LogicalAggregate(group=[{}], agg#0=[COUNT()], agg#1=[COUNT($0)])
            14:LogicalProject(ROW_VALUE=[$0], $f1=[true])
              13:LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 }]])
      20:LogicalAggregate(group=[{0}], agg#0=[MIN($1)])
        19:LogicalProject(ROW_VALUE=[$0], $f1=[true])
          18:LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 }]])
{code}

Not using the group-by value in the projection:
{code}
select
    count(*)
from T1000
group by
    case
        when Column_3 in (1, 2, 3, 4, 5) THEN 1
        else 0
    end
{code}
{code}
25:LogicalProject(EXPR$0=[$1])
  24:LogicalAggregate(group=[{0}], EXPR$0=[COUNT()])
    23:LogicalProject($f0=[CASE(AND(<>($4, 0), IS NOT NULL($8)), 1, 0)])
      22:LogicalJoin(condition=[=($6, $7)], joinType=[left])
        18:LogicalProject(Column_0=[$0], Column_1=[$1], Column_2=[$2], 
Column_3=[$3], $f0=[$4], $f1=[$5], Column_30=[$3])
          17:LogicalJoin(condition=[true], joinType=[inner])
            13:QueryTableScan(table=[[QUERY, T1000]], fields=[[0, 1, 2, 3]])
            16:LogicalAggregate(group=[{}], agg#0=[COUNT()], agg#1=[COUNT($0)])
              15:LogicalProject(ROW_VALUE=[$0], $f1=[true])
                14:LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 }]])
        21:LogicalAggregate(group=[{0}], agg#0=[MIN($1)])
          20:LogicalProject(ROW_VALUE=[$0], $f1=[true])
            19:LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 }]])
{code}




  was:
For these examples assume that:
- {{`SqlToRelConverter.Config.config().withInSubQueryThreshold(5)`}} has been 
set.
- The source table has 4 columns (Column_0 through Column_3)
- Each column is of type BIGINT NOT NULL

The failing query is:
{code}
select
    case
        when Column_3 in (1, 2, 3, 4, 5) THEN 1
        else 0
    end
from T1000
group by
    case
        when Column_3 in (1, 2, 3, 4, 5) THEN 1
        else 0
    end
{code}

The exception is:
{code}
3
java.lang.ArrayIndexOutOfBoundsException: 3
        at 
com.google.common.collect.RegularImmutableList.get(RegularImmutableList.java:75)
        at org.apache.calcite.tools.RelBuilder.inferAlias(RelBuilder.java:2163)
        at org.apache.calcite.tools.RelBuilder.project_(RelBuilder.java:1956)
        at org.apache.calcite.tools.RelBuilder.project(RelBuilder.java:1797)
        at org.apache.calcite.tools.RelBuilder.project(RelBuilder.java:1769)
        at org.apache.calcite.tools.RelBuilder.project(RelBuilder.java:1758)
        at 
org.apache.calcite.sql2rel.SqlToRelConverter$Blackboard.register(SqlToRelConverter.java:4680)
        at 
org.apache.calcite.sql2rel.SqlToRelConverter.substituteSubQuery(SqlToRelConverter.java:1268)
        at 
org.apache.calcite.sql2rel.SqlToRelConverter.replaceSubQueries(SqlToRelConverter.java:1127)
        at 
org.apache.calcite.sql2rel.SqlToRelConverter.createAggImpl(SqlToRelConverter.java:3325)
        at 
org.apache.calcite.sql2rel.SqlToRelConverter.convertAgg(SqlToRelConverter.java:3192)
        at 
org.apache.calcite.sql2rel.SqlToRelConverter.convertSelectImpl(SqlToRelConverter.java:738)
        at 
org.apache.calcite.sql2rel.SqlToRelConverter.convertSelect(SqlToRelConverter.java:664)
        at 
org.apache.calcite.sql2rel.SqlToRelConverter.convertQueryRecursive(SqlToRelConverter.java:3589)
        at 
org.apache.calcite.sql2rel.SqlToRelConverter.convertQuery(SqlToRelConverter.java:589)
        at org.apache.calcite.prepare.PlannerImpl.rel(PlannerImpl.java:259)
{code}

The table could contain N >= 4 columns, and the use of any column at or after 
index 3 will also cause this exception.
The use of any column  before index 3 gives the RelNode tree:
{code}
42:LogicalProject(EXPR$0=[$0])
  41:LogicalJoin(condition=[=($3, $4)], joinType=[left])
    37:LogicalProject(EXPR$0=[$0], $f0=[$1], $f1=[$2], $f10=[$2])
      36:LogicalJoin(condition=[true], joinType=[inner])
        32:LogicalAggregate(group=[{0}])
          31:LogicalProject(EXPR$0=[CASE(CAST(OR(AND(IS NOT NULL($8), <>($4, 
0)), AND(<($5, $4), null, <>($4, 0), IS NULL($8)))):BOOLEAN NOT NULL, 1, 0)])
            30:LogicalJoin(condition=[=($6, $7)], joinType=[left])
              26:LogicalProject(Column_0=[$0], Column_1=[$1], Column_2=[$2], 
Column_3=[$3], $f0=[$4], $f1=[$5], Column_20=[$2])
                25:LogicalJoin(condition=[true], joinType=[inner])
                  21:TableScan(....)
                  24:LogicalAggregate(group=[{}], agg#0=[COUNT()], 
agg#1=[COUNT($0)])
                    23:LogicalProject(ROW_VALUE=[$0], $f1=[true])
                      22:LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 
}]])
              29:LogicalAggregate(group=[{0}], agg#0=[MIN($1)])
                28:LogicalProject(ROW_VALUE=[$0], $f1=[true])
                  27:LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 }]])
        35:LogicalAggregate(group=[{}], agg#0=[COUNT()], agg#1=[COUNT($0)])
          34:LogicalProject(ROW_VALUE=[$0], $f1=[true])
            33:LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 }]])
    40:LogicalAggregate(group=[{0}], agg#0=[MIN($1)])
      39:LogicalProject(ROW_VALUE=[$0], $f1=[true])
        38:LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 }]])
{code}

If the number of predicates is less than the subquery threshold, there are no 
issues.

Additionally, the below queries also produce RelNode trees
Using an alias:
{code}
select
    case
        when Column_3 in (1, 2, 3, 4, 5) THEN 1
        else 0
    end as CASE_ALIAS
from T1000
group by
    CASE_ALIAS
{code}
{code}
23:LogicalAggregate(group=[{0}])
  22:LogicalProject(CASE_ALIAS=[CASE(AND(<>($4, 0), IS NOT NULL($8)), 1, 0)])
    21:LogicalJoin(condition=[=($6, $7)], joinType=[left])
      17:LogicalProject(Column_0=[$0], Column_1=[$1], Column_2=[$2], 
Column_3=[$3], $f0=[$4], $f1=[$5], Column_30=[$3])
        16:LogicalJoin(condition=[true], joinType=[inner])
          12:TableScan(...)
          15:LogicalAggregate(group=[{}], agg#0=[COUNT()], agg#1=[COUNT($0)])
            14:LogicalProject(ROW_VALUE=[$0], $f1=[true])
              13:LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 }]])
      20:LogicalAggregate(group=[{0}], agg#0=[MIN($1)])
        19:LogicalProject(ROW_VALUE=[$0], $f1=[true])
          18:LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 }]])
{code}

Not using the group-by value in the projection
{code}
select
    count(*)
from T1000
group by
    case
        when Column_3 in (1, 2, 3, 4, 5) THEN 1
        else 0
    end
{code}
{code}
25:LogicalProject(EXPR$0=[$1])
  24:LogicalAggregate(group=[{0}], EXPR$0=[COUNT()])
    23:LogicalProject($f0=[CASE(AND(<>($4, 0), IS NOT NULL($8)), 1, 0)])
      22:LogicalJoin(condition=[=($6, $7)], joinType=[left])
        18:LogicalProject(Column_0=[$0], Column_1=[$1], Column_2=[$2], 
Column_3=[$3], $f0=[$4], $f1=[$5], Column_30=[$3])
          17:LogicalJoin(condition=[true], joinType=[inner])
            13:QueryTableScan(table=[[QUERY, T1000]], fields=[[0, 1, 2, 3]])
            16:LogicalAggregate(group=[{}], agg#0=[COUNT()], agg#1=[COUNT($0)])
              15:LogicalProject(ROW_VALUE=[$0], $f1=[true])
                14:LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 }]])
        21:LogicalAggregate(group=[{0}], agg#0=[MIN($1)])
          20:LogicalProject(ROW_VALUE=[$0], $f1=[true])
            19:LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 }]])
{code}





> ArrayIndexOutOfBoundsException during SqlToRelConverter for group-by on 
> `case` having `in` expression predicates exceeding SqlRelConverter.Config 
> InSubQueryThreshold
> ---------------------------------------------------------------------------------------------------------------------------------------------------------------------
>
>                 Key: CALCITE-5209
>                 URL: https://issues.apache.org/jira/browse/CALCITE-5209
>             Project: Calcite
>          Issue Type: Bug
>            Reporter: Ian Bertolacci
>            Priority: Major
>
> For these examples assume that:
> - {{`SqlToRelConverter.Config.config().withInSubQueryThreshold(5)`}} has been 
> set.
> - The source table has 4 columns (Column_0 through Column_3)
> - Each column is of type BIGINT NOT NULL
> The failing query is:
> {code}
> select
>     case
>         when Column_3 in (1, 2, 3, 4, 5) THEN 1
>         else 0
>     end
> from T1000
> group by
>     case
>         when Column_3 in (1, 2, 3, 4, 5) THEN 1
>         else 0
>     end
> {code}
> The exception is:
> {code}
> 3
> java.lang.ArrayIndexOutOfBoundsException: 3
>       at 
> com.google.common.collect.RegularImmutableList.get(RegularImmutableList.java:75)
>       at org.apache.calcite.tools.RelBuilder.inferAlias(RelBuilder.java:2163)
>       at org.apache.calcite.tools.RelBuilder.project_(RelBuilder.java:1956)
>       at org.apache.calcite.tools.RelBuilder.project(RelBuilder.java:1797)
>       at org.apache.calcite.tools.RelBuilder.project(RelBuilder.java:1769)
>       at org.apache.calcite.tools.RelBuilder.project(RelBuilder.java:1758)
>       at 
> org.apache.calcite.sql2rel.SqlToRelConverter$Blackboard.register(SqlToRelConverter.java:4680)
>       at 
> org.apache.calcite.sql2rel.SqlToRelConverter.substituteSubQuery(SqlToRelConverter.java:1268)
>       at 
> org.apache.calcite.sql2rel.SqlToRelConverter.replaceSubQueries(SqlToRelConverter.java:1127)
>       at 
> org.apache.calcite.sql2rel.SqlToRelConverter.createAggImpl(SqlToRelConverter.java:3325)
>       at 
> org.apache.calcite.sql2rel.SqlToRelConverter.convertAgg(SqlToRelConverter.java:3192)
>       at 
> org.apache.calcite.sql2rel.SqlToRelConverter.convertSelectImpl(SqlToRelConverter.java:738)
>       at 
> org.apache.calcite.sql2rel.SqlToRelConverter.convertSelect(SqlToRelConverter.java:664)
>       at 
> org.apache.calcite.sql2rel.SqlToRelConverter.convertQueryRecursive(SqlToRelConverter.java:3589)
>       at 
> org.apache.calcite.sql2rel.SqlToRelConverter.convertQuery(SqlToRelConverter.java:589)
>       at org.apache.calcite.prepare.PlannerImpl.rel(PlannerImpl.java:259)
> {code}
> The table could contain N >= 4 columns, and the use of any column at or after 
> index 3 will also cause this exception.
> The use of any column  before index 3 gives the RelNode tree:
> {code}
> 42:LogicalProject(EXPR$0=[$0])
>   41:LogicalJoin(condition=[=($3, $4)], joinType=[left])
>     37:LogicalProject(EXPR$0=[$0], $f0=[$1], $f1=[$2], $f10=[$2])
>       36:LogicalJoin(condition=[true], joinType=[inner])
>         32:LogicalAggregate(group=[{0}])
>           31:LogicalProject(EXPR$0=[CASE(CAST(OR(AND(IS NOT NULL($8), <>($4, 
> 0)), AND(<($5, $4), null, <>($4, 0), IS NULL($8)))):BOOLEAN NOT NULL, 1, 0)])
>             30:LogicalJoin(condition=[=($6, $7)], joinType=[left])
>               26:LogicalProject(Column_0=[$0], Column_1=[$1], Column_2=[$2], 
> Column_3=[$3], $f0=[$4], $f1=[$5], Column_20=[$2])
>                 25:LogicalJoin(condition=[true], joinType=[inner])
>                   21:TableScan(....)
>                   24:LogicalAggregate(group=[{}], agg#0=[COUNT()], 
> agg#1=[COUNT($0)])
>                     23:LogicalProject(ROW_VALUE=[$0], $f1=[true])
>                       22:LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 
> 5 }]])
>               29:LogicalAggregate(group=[{0}], agg#0=[MIN($1)])
>                 28:LogicalProject(ROW_VALUE=[$0], $f1=[true])
>                   27:LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 
> }]])
>         35:LogicalAggregate(group=[{}], agg#0=[COUNT()], agg#1=[COUNT($0)])
>           34:LogicalProject(ROW_VALUE=[$0], $f1=[true])
>             33:LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 }]])
>     40:LogicalAggregate(group=[{0}], agg#0=[MIN($1)])
>       39:LogicalProject(ROW_VALUE=[$0], $f1=[true])
>         38:LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 }]])
> {code}
> If the number of predicates is less than the subquery threshold, there are no 
> issues.
> Additionally, the below queries also produce RelNode trees
> Using an alias:
> {code}
> select
>     case
>         when Column_3 in (1, 2, 3, 4, 5) THEN 1
>         else 0
>     end as CASE_ALIAS
> from T1000
> group by
>     CASE_ALIAS
> {code}
> {code}
> 23:LogicalAggregate(group=[{0}])
>   22:LogicalProject(CASE_ALIAS=[CASE(AND(<>($4, 0), IS NOT NULL($8)), 1, 0)])
>     21:LogicalJoin(condition=[=($6, $7)], joinType=[left])
>       17:LogicalProject(Column_0=[$0], Column_1=[$1], Column_2=[$2], 
> Column_3=[$3], $f0=[$4], $f1=[$5], Column_30=[$3])
>         16:LogicalJoin(condition=[true], joinType=[inner])
>           12:TableScan(...)
>           15:LogicalAggregate(group=[{}], agg#0=[COUNT()], agg#1=[COUNT($0)])
>             14:LogicalProject(ROW_VALUE=[$0], $f1=[true])
>               13:LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 }]])
>       20:LogicalAggregate(group=[{0}], agg#0=[MIN($1)])
>         19:LogicalProject(ROW_VALUE=[$0], $f1=[true])
>           18:LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 }]])
> {code}
> Not using the group-by value in the projection:
> {code}
> select
>     count(*)
> from T1000
> group by
>     case
>         when Column_3 in (1, 2, 3, 4, 5) THEN 1
>         else 0
>     end
> {code}
> {code}
> 25:LogicalProject(EXPR$0=[$1])
>   24:LogicalAggregate(group=[{0}], EXPR$0=[COUNT()])
>     23:LogicalProject($f0=[CASE(AND(<>($4, 0), IS NOT NULL($8)), 1, 0)])
>       22:LogicalJoin(condition=[=($6, $7)], joinType=[left])
>         18:LogicalProject(Column_0=[$0], Column_1=[$1], Column_2=[$2], 
> Column_3=[$3], $f0=[$4], $f1=[$5], Column_30=[$3])
>           17:LogicalJoin(condition=[true], joinType=[inner])
>             13:QueryTableScan(table=[[QUERY, T1000]], fields=[[0, 1, 2, 3]])
>             16:LogicalAggregate(group=[{}], agg#0=[COUNT()], 
> agg#1=[COUNT($0)])
>               15:LogicalProject(ROW_VALUE=[$0], $f1=[true])
>                 14:LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 }]])
>         21:LogicalAggregate(group=[{0}], agg#0=[MIN($1)])
>           20:LogicalProject(ROW_VALUE=[$0], $f1=[true])
>             19:LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 }]])
> {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to