[
https://issues.apache.org/jira/browse/CALCITE-2235?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Dmitri Shtilman updated CALCITE-2235:
-------------------------------------
Description:
Calcite seems to be mishandling IS NULL, IS NOT NULL on arrays.
{{CREATE TABLE str (}}
{{id INTEGER,}}
{{strings TEXT[])}}
In this plan IS NULL filter is present
{{select * from str where strings is null;}}
{{LogicalProject(id=[$0], strings=[$1])}}
{{ LogicalFilter(condition=[IS NULL($1)])}}
{{ EnumerableTableScan(table=[[cat, str]])}}
But IS NOT NULL filter is lost
{{select * from str where strings is not null;}}
{{LogicalProject(id=[$0], strings=[$1])}}
{{ EnumerableTableScan(table=[[cat, str]])}}
IS NOT NULL is not lost for scalar
{{select * from str where id is not null;}}
{{LogicalProject(id=[$0], strings=[$1])}}
{{ LogicalFilter(condition=[IS NOT NULL($0)])}}
{{ EnumerableTableScan(table=[[cat, str]])}}
This is not specific to string arrays, here is the same issue with integer
arrays
{{CREATE TABLE arr_table (}}
{{ id BIGINT,}}
{{ arr INTEGER[])}}
Here the IS NULL filter is kept
{{select * from arr_table where arr is null;}}
{{LogicalProject(id=[$0], arr=[$1])}}
{{ LogicalFilter(condition=[IS NULL($1)])}}
{{ EnumerableTableScan(table=[[cat, arr_table]])}}
But IS NOT NULL is lost
{{select * from arr_table where arr is not null;}}
{{LogicalProject(id=[$0], arr=[$1])}}
{{ EnumerableTableScan(table=[[cat, arr_table]])}}
Here IS NULL is either lost or optimized away from the EXPR, it just picks
arr[1]
{{select case when arr IS NULL THEN -111 else arr[1] end from arr_table;}}
{{LogicalProject(EXPR$0=[ITEM($1, 1)])}}
{{ EnumerableTableScan(table=[[cat, arr_table]])}}
Checking on the integer id instead brings back the case with IS NULL:
{{select case when id IS NULL THEN -111 else arr[1] end from arr_table;}}
{{LogicalProject(EXPR$0=[CASE(IS NULL($0), -111, ITEM($1, 1))])}}
{{ EnumerableTableScan(table=[[cat, arr_table]])}}
{{select case when strings IS NULL THEN 'unknown' when strings IS NOT NULL then
UNNEST(strings) END from str group by strings;}}
{{LogicalProject(EXPR$0=[UNNEST($0)])}}
{{ LogicalAggregate(group=[0])}}
{{ LogicalProject(strings=[$1])}}
{{ EnumerableTableScan(table=[[cat, str]])}}
Case disappears from expression in the plan above but switching to scalar bring
it back:
{{select case when id IS NULL THEN 'unknown' when id IS NOT NULL then
UNNEST(strings) END from str group by id,strings;}}
{{LogicalProject(EXPR$0=[CASE(IS NULL($0), 'unknown', IS NOT NULL($0),
UNNEST($1), null)])}}
{{ LogicalAggregate(group=[0, 1])}}
{{ LogicalProject(id=[$0], strings=[$1])}}
{{ EnumerableTableScan(table=[[cat, str]])}}
was:
Calcite seems to be mishandling IS NULL, IS NOT NULL on arrays.
{{CREATE TABLE str (}}
{{id INTEGER,}}
{{strings TEXT[])}}
In this plan IS NULL filter is present
{{select * from str where strings is null;}}
{{LogicalProject(id=[$0], strings=[$1])}}
{{ LogicalFilter(condition=[IS NULL($1)])}}
{{ EnumerableTableScan(table=[[cat, str]])}}
But IS NOT NULL filter is lost
{{select * from str where strings is not null;}}
{{LogicalProject(id=[$0], strings=[$1])}}
{{ EnumerableTableScan(table=[[cat, str]])}}
IS NOT NULL is not lost for scalar
{{select * from str where id is not null;}}
{{LogicalProject(id=[$0], strings=[$1])}}
{{ LogicalFilter(condition=[IS NOT NULL($0)])}}
{{ EnumerableTableScan(table=[[cat, str]])}}
This is not specific to string arrays, here is the same issue with integer
arrays
{{CREATE TABLE arr_table (}}
{{ id BIGINT,}}
{{ arr INTEGER[])}}
Here the IS NULL filter is kept
{{select * from arr_table where arr is null;}}
{{LogicalProject(id=[$0], arr=[$1])}}
{{ LogicalFilter(condition=[IS NULL($1)])}}
{{ EnumerableTableScan(table=[[cat, arr_table]])}}
But IS NOT NULL is lost
{{select * from arr_table where arr is not null;}}
{{LogicalProject(id=[$0], arr=[$1])}}
{{ EnumerableTableScan(table=[[cat, arr_table]])}}
Here IS NULL is either lost or optimized away from the EXPR, it just picks
arr[1]
{{select case when arr IS NULL THEN -111 else arr[1] end from arr_table;}}
{{LogicalProject(EXPR$0=[ITEM($1, 1)])}}
{{ EnumerableTableScan(table=[[cat, arr_table]])}}
Checking on the integer id instead brings back the case with IS NULL:
{{select case when id IS NULL THEN -111 else arr[1] end from arr_table;}}
{{LogicalProject(EXPR$0=[CASE(IS NULL($0), -111, ITEM($1, 1))])}}
{{ EnumerableTableScan(table=[[cat, arr_table]])}}
{{select case when strings IS NULL THEN 'unknown' when strings IS NOT NULL then
UNNEST(strings) END from str group by strings;}}
{{LogicalProject(EXPR$0=[UNNEST($0)])}}
{{ LogicalAggregate(group=[\\{0}])}}
{{ LogicalProject(strings=[$1])}}
{{ EnumerableTableScan(table=[[cat, str]])}}
Case disappears from expression in the plan above but switching to scalar bring
it back:
{{select case when id IS NULL THEN 'unknown' when id IS NOT NULL then
UNNEST(strings) END from str group by id,strings;}}
{{LogicalProject(EXPR$0=[CASE(IS NULL($0), 'unknown', IS NOT NULL($0),
UNNEST($1), null)])}}
{{ LogicalAggregate(group=[0, 1])}}
{{ LogicalProject(id=[$0], strings=[$1])}}
{{ EnumerableTableScan(table=[[cat, str]])}}
> IS NULL, IS NOT NULL seem to be mishandled for arrays
> -----------------------------------------------------
>
> Key: CALCITE-2235
> URL: https://issues.apache.org/jira/browse/CALCITE-2235
> Project: Calcite
> Issue Type: Bug
> Components: core
> Affects Versions: 1.14.0
> Reporter: Dmitri Shtilman
> Assignee: Julian Hyde
> Priority: Major
>
> Calcite seems to be mishandling IS NULL, IS NOT NULL on arrays.
>
> {{CREATE TABLE str (}}
> {{id INTEGER,}}
> {{strings TEXT[])}}
>
> In this plan IS NULL filter is present
> {{select * from str where strings is null;}}
> {{LogicalProject(id=[$0], strings=[$1])}}
> {{ LogicalFilter(condition=[IS NULL($1)])}}
> {{ EnumerableTableScan(table=[[cat, str]])}}
> But IS NOT NULL filter is lost
> {{select * from str where strings is not null;}}
> {{LogicalProject(id=[$0], strings=[$1])}}
> {{ EnumerableTableScan(table=[[cat, str]])}}
> IS NOT NULL is not lost for scalar
> {{select * from str where id is not null;}}
> {{LogicalProject(id=[$0], strings=[$1])}}
> {{ LogicalFilter(condition=[IS NOT NULL($0)])}}
> {{ EnumerableTableScan(table=[[cat, str]])}}
>
> This is not specific to string arrays, here is the same issue with integer
> arrays
> {{CREATE TABLE arr_table (}}
> {{ id BIGINT,}}
> {{ arr INTEGER[])}}
>
> Here the IS NULL filter is kept
> {{select * from arr_table where arr is null;}}
> {{LogicalProject(id=[$0], arr=[$1])}}
> {{ LogicalFilter(condition=[IS NULL($1)])}}
> {{ EnumerableTableScan(table=[[cat, arr_table]])}}
> But IS NOT NULL is lost
> {{select * from arr_table where arr is not null;}}
> {{LogicalProject(id=[$0], arr=[$1])}}
> {{ EnumerableTableScan(table=[[cat, arr_table]])}}
>
> Here IS NULL is either lost or optimized away from the EXPR, it just picks
> arr[1]
> {{select case when arr IS NULL THEN -111 else arr[1] end from arr_table;}}
> {{LogicalProject(EXPR$0=[ITEM($1, 1)])}}
> {{ EnumerableTableScan(table=[[cat, arr_table]])}}
> Checking on the integer id instead brings back the case with IS NULL:
> {{select case when id IS NULL THEN -111 else arr[1] end from arr_table;}}
> {{LogicalProject(EXPR$0=[CASE(IS NULL($0), -111, ITEM($1, 1))])}}
> {{ EnumerableTableScan(table=[[cat, arr_table]])}}
>
> {{select case when strings IS NULL THEN 'unknown' when strings IS NOT NULL
> then UNNEST(strings) END from str group by strings;}}
> {{LogicalProject(EXPR$0=[UNNEST($0)])}}
> {{ LogicalAggregate(group=[0])}}
> {{ LogicalProject(strings=[$1])}}
> {{ EnumerableTableScan(table=[[cat, str]])}}
>
> Case disappears from expression in the plan above but switching to scalar
> bring it back:
> {{select case when id IS NULL THEN 'unknown' when id IS NOT NULL then
> UNNEST(strings) END from str group by id,strings;}}
> {{LogicalProject(EXPR$0=[CASE(IS NULL($0), 'unknown', IS NOT NULL($0),
> UNNEST($1), null)])}}
> {{ LogicalAggregate(group=[0, 1])}}
> {{ LogicalProject(id=[$0], strings=[$1])}}
> {{ EnumerableTableScan(table=[[cat, str]])}}
>
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)