[ https://issues.apache.org/jira/browse/CALCITE-2235?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Dmitri Shtilman updated CALCITE-2235: ------------------------------------- Description: Calcite seems to be mishandling IS NULL, IS NOT NULL on arrays. {{CREATE TABLE str (}} {{id INTEGER,}} {{strings TEXT[])}} In this plan IS NULL filter is present {{select * from str where strings is null;}} {{LogicalProject(id=[$0], strings=[$1])}} {{ LogicalFilter(condition=[IS NULL($1)])}} {{ EnumerableTableScan(table=[[cat, str]])}} But IS NOT NULL filter is lost {{select * from str where strings is not null;}} {{LogicalProject(id=[$0], strings=[$1])}} {{ EnumerableTableScan(table=[[cat, str]])}} IS NOT NULL is not lost for scalar {{select * from str where id is not null;}} {{LogicalProject(id=[$0], strings=[$1])}} {{ LogicalFilter(condition=[IS NOT NULL($0)])}} {{ EnumerableTableScan(table=[[cat, str]])}} This is not specific to string arrays, here is the same issue with integer arrays {{CREATE TABLE arr_table (}} {{ id BIGINT,}} {{ arr INTEGER[])}} Here the IS NULL filter is kept {{select * from arr_table where arr is null;}} {{LogicalProject(id=[$0], arr=[$1])}} {{ LogicalFilter(condition=[IS NULL($1)])}} {{ EnumerableTableScan(table=[[cat, arr_table]])}} But IS NOT NULL is lost {{select * from arr_table where arr is not null;}} {{LogicalProject(id=[$0], arr=[$1])}} {{ EnumerableTableScan(table=[[cat, arr_table]])}} Here IS NULL is either lost or optimized away from the EXPR, it just picks arr[1] {{select case when arr IS NULL THEN -111 else arr[1] end from arr_table;}} {{LogicalProject(EXPR$0=[ITEM($1, 1)])}} {{ EnumerableTableScan(table=[[cat, arr_table]])}} Checking on the integer id instead brings back the case with IS NULL: {{select case when id IS NULL THEN -111 else arr[1] end from arr_table;}} {{LogicalProject(EXPR$0=[CASE(IS NULL($0), -111, ITEM($1, 1))])}} {{ EnumerableTableScan(table=[[cat, arr_table]])}} {{select case when strings IS NULL THEN 'unknown' when strings IS NOT NULL then UNNEST(strings) END from str group by strings;}} {{LogicalProject(EXPR$0=[UNNEST($0)])}} {{ LogicalAggregate(group=[\\{0}])}} {{ LogicalProject(strings=[$1])}} {{ EnumerableTableScan(table=[[cat, str]])}} Case disappears from expression in the plan above but switching to scalar bring it back: {{select case when id IS NULL THEN 'unknown' when id IS NOT NULL then UNNEST(strings) END from str group by id,strings;}} {{LogicalProject(EXPR$0=[CASE(IS NULL($0), 'unknown', IS NOT NULL($0), UNNEST($1), null)])}} {{ LogicalAggregate(group=[0, 1])}} {{ LogicalProject(id=[$0], strings=[$1])}} {{ EnumerableTableScan(table=[[cat, str]])}} was: Calcite seems to be mishandling IS NULL, IS NOT NULL on arrays. {{CREATE TABLE str (}} {{id INTEGER,}} {{strings TEXT[])}} In this plan IS NULL filter is present {{select * from str where strings is null;}} {{LogicalProject(id=[$0], strings=[$1])}} {{ LogicalFilter(condition=[IS NULL($1)])}} {{ EnumerableTableScan(table=[[cat, str]])}} But IS NOT NULL filter is lost {{select * from str where strings is not null;}} {{LogicalProject(id=[$0], strings=[$1])}} {{ EnumerableTableScan(table=[[cat, str]])}} IS NOT NULL is not lost for scalar {{select * from str where id is not null;}} {{LogicalProject(id=[$0], strings=[$1])}} {{ LogicalFilter(condition=[IS NOT NULL($0)])}} {{ EnumerableTableScan(table=[[cat, str]])}} This is not specific to string arrays, here is the same issue with integer arrays {{CREATE TABLE arr_table (}} {{ id BIGINT,}} {{ arr INTEGER[])}} Here the IS NULL filter is kept {{select * from arr_table where arr is null;}} {{LogicalProject(id=[$0], arr=[$1])}} {{ LogicalFilter(condition=[IS NULL($1)])}} {{ EnumerableTableScan(table=[[cat, arr_table]])}} But IS NOT NULL is lost {{select * from arr_table where arr is not null;}} {{LogicalProject(id=[$0], arr=[$1])}} {{ EnumerableTableScan(table=[[cat, arr_table]])}} Here IS NULL is either lost or optimized away from the EXPR, it just picks arr[1] {{select case when arr IS NULL THEN -111 else arr[1] end from arr_table;}} {{LogicalProject(EXPR$0=[ITEM($1, 1)])}} {{ EnumerableTableScan(table=[[cat, arr_table]])}} Checking on the integer id instead brings back the case with IS NULL: {{select case when id IS NULL THEN -111 else arr[1] end from arr_table;}} {{LogicalProject(EXPR$0=[CASE(IS NULL($0), -111, ITEM($1, 1))])}} {{ EnumerableTableScan(table=[[cat, arr_table]])}} {{select case when strings IS NULL THEN 'unknown' when strings IS NOT NULL then UNNEST(strings) END from str group by strings;}} {{LogicalProject(EXPR$0=[UNNEST($0)])}} {{ LogicalAggregate(group=[\\{0}])}} {{ LogicalProject(strings=[$1])}} {{ EnumerableTableScan(table=[[cat, str]])}} Case disappears from expression in the plan above but switching to scalar bring it back: {{ }} {{select case when id IS NULL THEN 'unknown' when id IS NOT NULL then UNNEST(strings) END from str group by id,strings;}} {{LogicalProject(EXPR$0=[CASE(IS NULL($0), 'unknown', IS NOT NULL($0), UNNEST($1), null)])}} {{ LogicalAggregate(group=[\\{0, 1}])}} {{ LogicalProject(id=[$0], strings=[$1])}} {{ EnumerableTableScan(table=[[cat, str]])}} > IS NULL, IS NOT NULL seem to be mishandled for arrays > ----------------------------------------------------- > > Key: CALCITE-2235 > URL: https://issues.apache.org/jira/browse/CALCITE-2235 > Project: Calcite > Issue Type: Bug > Components: core > Affects Versions: 1.14.0 > Reporter: Dmitri Shtilman > Assignee: Julian Hyde > Priority: Major > > Calcite seems to be mishandling IS NULL, IS NOT NULL on arrays. > > {{CREATE TABLE str (}} > {{id INTEGER,}} > {{strings TEXT[])}} > > In this plan IS NULL filter is present > {{select * from str where strings is null;}} > {{LogicalProject(id=[$0], strings=[$1])}} > {{ LogicalFilter(condition=[IS NULL($1)])}} > {{ EnumerableTableScan(table=[[cat, str]])}} > But IS NOT NULL filter is lost > {{select * from str where strings is not null;}} > {{LogicalProject(id=[$0], strings=[$1])}} > {{ EnumerableTableScan(table=[[cat, str]])}} > IS NOT NULL is not lost for scalar > {{select * from str where id is not null;}} > {{LogicalProject(id=[$0], strings=[$1])}} > {{ LogicalFilter(condition=[IS NOT NULL($0)])}} > {{ EnumerableTableScan(table=[[cat, str]])}} > > This is not specific to string arrays, here is the same issue with integer > arrays > {{CREATE TABLE arr_table (}} > {{ id BIGINT,}} > {{ arr INTEGER[])}} > > Here the IS NULL filter is kept > {{select * from arr_table where arr is null;}} > {{LogicalProject(id=[$0], arr=[$1])}} > {{ LogicalFilter(condition=[IS NULL($1)])}} > {{ EnumerableTableScan(table=[[cat, arr_table]])}} > But IS NOT NULL is lost > {{select * from arr_table where arr is not null;}} > {{LogicalProject(id=[$0], arr=[$1])}} > {{ EnumerableTableScan(table=[[cat, arr_table]])}} > > Here IS NULL is either lost or optimized away from the EXPR, it just picks > arr[1] > {{select case when arr IS NULL THEN -111 else arr[1] end from arr_table;}} > {{LogicalProject(EXPR$0=[ITEM($1, 1)])}} > {{ EnumerableTableScan(table=[[cat, arr_table]])}} > Checking on the integer id instead brings back the case with IS NULL: > {{select case when id IS NULL THEN -111 else arr[1] end from arr_table;}} > {{LogicalProject(EXPR$0=[CASE(IS NULL($0), -111, ITEM($1, 1))])}} > {{ EnumerableTableScan(table=[[cat, arr_table]])}} > > {{select case when strings IS NULL THEN 'unknown' when strings IS NOT NULL > then UNNEST(strings) END from str group by strings;}} > {{LogicalProject(EXPR$0=[UNNEST($0)])}} > {{ LogicalAggregate(group=[\\{0}])}} > {{ LogicalProject(strings=[$1])}} > {{ EnumerableTableScan(table=[[cat, str]])}} > > Case disappears from expression in the plan above but switching to scalar > bring it back: > {{select case when id IS NULL THEN 'unknown' when id IS NOT NULL then > UNNEST(strings) END from str group by id,strings;}} > {{LogicalProject(EXPR$0=[CASE(IS NULL($0), 'unknown', IS NOT NULL($0), > UNNEST($1), null)])}} > {{ LogicalAggregate(group=[0, 1])}} > {{ LogicalProject(id=[$0], strings=[$1])}} > {{ EnumerableTableScan(table=[[cat, str]])}} > -- This message was sent by Atlassian JIRA (v7.6.3#76005)