[ 
https://issues.apache.org/jira/browse/CALCITE-2235?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Dmitri Shtilman updated CALCITE-2235:
-------------------------------------
    Description: 
Calcite seems to be mishandling IS NULL, IS NOT NULL on arrays.

 

{{CREATE TABLE str (}}
 {{id INTEGER,}}
 {{strings TEXT[])}}

 

In this plan IS NULL filter is present
 {{select * from str where strings is null;}}

{{LogicalProject(id=[$0], strings=[$1])}}
 {{  LogicalFilter(condition=[IS NULL($1)])}}
 {{    EnumerableTableScan(table=[[cat, str]])}}

But IS NOT NULL filter is lost
 {{select * from str where strings is not null;}}

{{LogicalProject(id=[$0], strings=[$1])}}
 {{  EnumerableTableScan(table=[[cat, str]])}}

IS NOT NULL is not lost for scalar
 {{select * from str where id is not null;}}

{{LogicalProject(id=[$0], strings=[$1])}}
 {{  LogicalFilter(condition=[IS NOT NULL($0)])}}
 {{    EnumerableTableScan(table=[[cat, str]])}}

 

This is not specific to string arrays, here is the same issue with integer 
arrays

{{CREATE TABLE arr_table (}}
 {{  id BIGINT,}}
 {{  arr INTEGER[])}}

 

Here the IS NULL filter is kept

{{select * from arr_table where arr is null;}}

{{LogicalProject(id=[$0], arr=[$1])}}
 {{  LogicalFilter(condition=[IS NULL($1)])}}
 {{    EnumerableTableScan(table=[[cat, arr_table]])}}

But IS NOT NULL is lost

{{select * from arr_table where arr is not null;}}

{{LogicalProject(id=[$0], arr=[$1])}}
 {{  EnumerableTableScan(table=[[cat, arr_table]])}}

 

Here IS NULL is either lost or optimized away from the EXPR, it just picks 
arr[1]

{{select case when arr IS NULL THEN -111 else arr[1] end from arr_table;}}

{{LogicalProject(EXPR$0=[ITEM($1, 1)])}}
 {{  EnumerableTableScan(table=[[cat, arr_table]])}}

Checking on the integer id instead brings back the case with IS NULL:

{{select case when id IS NULL THEN -111 else arr[1] end from arr_table;}}

{{LogicalProject(EXPR$0=[CASE(IS NULL($0), -111, ITEM($1, 1))])}}
 {{  EnumerableTableScan(table=[[cat, arr_table]])}}

 

{{select case when strings IS NULL THEN 'unknown' when strings IS NOT NULL then 
UNNEST(strings) END from str group by strings;}}

LogicalProject(EXPR$0=[UNNEST($0)])
  LogicalAggregate(group=[\{0}])
    LogicalProject(strings=[$1])
       EnumerableTableScan(table=[[cat, str]])

 

Case disappears from expression in the plan above but switching to scalar bring 
it back:

{{select case when id IS NULL THEN 'unknown' when id IS NOT NULL then 
UNNEST(strings) END from str group by id,strings;}}


 LogicalProject(EXPR$0=[CASE(IS NULL($0), 'unknown', IS NOT NULL($0), 
UNNEST($1), null)])
  LogicalAggregate(group=[\{0, 1}])
     LogicalProject(id=[$0], strings=[$1])
      EnumerableTableScan(table=[[cat, str]])

  was:
Calcite seems to be mishandling IS NULL, IS NOT NULL on arrays.

 

{{CREATE TABLE str (}}
 {{id INTEGER,}}
 {{strings TEXT[])}}

 

In this plan IS NULL filter is present
 {{select * from str where strings is null;}}

{{LogicalProject(id=[$0], strings=[$1])}}
 {{  LogicalFilter(condition=[IS NULL($1)])}}
 {{    EnumerableTableScan(table=[[cat, str]])}}

But IS NOT NULL filter is lost
 {{select * from str where strings is not null;}}

{{LogicalProject(id=[$0], strings=[$1])}}
 {{  EnumerableTableScan(table=[[cat, str]])}}

IS NOT NULL is not lost for scalar
 {{select * from str where id is not null;}}

{{LogicalProject(id=[$0], strings=[$1])}}
 {{  LogicalFilter(condition=[IS NOT NULL($0)])}}
 {{    EnumerableTableScan(table=[[cat, str]])}}

 

This is not specific to string arrays, here is the same issue with integer 
arrays

{{CREATE TABLE arr_table (}}
 {{  id BIGINT,}}
 {{  arr INTEGER[])}}

 

Here the IS NULL filter is kept

{{select * from arr_table where arr is null;}}

{{LogicalProject(id=[$0], arr=[$1])}}
 {{  LogicalFilter(condition=[IS NULL($1)])}}
 {{    EnumerableTableScan(table=[[cat, arr_table]])}}

But IS NOT NULL is lost

{{select * from arr_table where arr is not null;}}

{{LogicalProject(id=[$0], arr=[$1])}}
 {{  EnumerableTableScan(table=[[cat, arr_table]])}}

 

Here IS NULL is either lost or optimized away from the EXPR, it just picks 
arr[1]

{{select case when arr IS NULL THEN -111 else arr[1] end from arr_table;}}

{{LogicalProject(EXPR$0=[ITEM($1, 1)])}}
 {{  EnumerableTableScan(table=[[cat, arr_table]])}}

Checking on the integer id instead brings back the case with IS NULL:

{{select case when id IS NULL THEN -111 else arr[1] end from arr_table;}}

{{LogicalProject(EXPR$0=[CASE(IS NULL($0), -111, ITEM($1, 1))])}}
 {{  EnumerableTableScan(table=[[cat, arr_table]])}}

 

{{select case when strings IS NULL THEN 'unknown' when strings IS NOT NULL then 
UNNEST(strings) END from str group by strings;}}

{{LogicalProject(EXPR$0=[UNNEST($0)])}}
 {{  LogicalAggregate(group=[{0}])}}
 {{   LogicalProject(strings=[$1])}}
 {{       EnumerableTableScan(table=[[cat, str]])}}

 

Case disappears from expression in the plan above but switching to scalar bring 
it back:

 {{select case when id IS NULL THEN 'unknown' when id IS NOT NULL then 
UNNEST(strings) END from str group by id,strings;}}

```
{{LogicalProject(EXPR$0=[CASE(IS NULL($0), 'unknown', IS NOT NULL($0), 
UNNEST($1), null)])}}
 {{  LogicalAggregate(group=[{0, 1}])}}
 {{    LogicalProject(id=[$0], strings=[$1])}}
 {{      EnumerableTableScan(table=[[cat, str]]) }}
```


> IS NULL, IS NOT NULL seem to be mishandled for arrays
> -----------------------------------------------------
>
>                 Key: CALCITE-2235
>                 URL: https://issues.apache.org/jira/browse/CALCITE-2235
>             Project: Calcite
>          Issue Type: Bug
>          Components: core
>    Affects Versions: 1.14.0
>            Reporter: Dmitri Shtilman
>            Assignee: Julian Hyde
>            Priority: Major
>
> Calcite seems to be mishandling IS NULL, IS NOT NULL on arrays.
>  
> {{CREATE TABLE str (}}
>  {{id INTEGER,}}
>  {{strings TEXT[])}}
>  
> In this plan IS NULL filter is present
>  {{select * from str where strings is null;}}
> {{LogicalProject(id=[$0], strings=[$1])}}
>  {{  LogicalFilter(condition=[IS NULL($1)])}}
>  {{    EnumerableTableScan(table=[[cat, str]])}}
> But IS NOT NULL filter is lost
>  {{select * from str where strings is not null;}}
> {{LogicalProject(id=[$0], strings=[$1])}}
>  {{  EnumerableTableScan(table=[[cat, str]])}}
> IS NOT NULL is not lost for scalar
>  {{select * from str where id is not null;}}
> {{LogicalProject(id=[$0], strings=[$1])}}
>  {{  LogicalFilter(condition=[IS NOT NULL($0)])}}
>  {{    EnumerableTableScan(table=[[cat, str]])}}
>  
> This is not specific to string arrays, here is the same issue with integer 
> arrays
> {{CREATE TABLE arr_table (}}
>  {{  id BIGINT,}}
>  {{  arr INTEGER[])}}
>  
> Here the IS NULL filter is kept
> {{select * from arr_table where arr is null;}}
> {{LogicalProject(id=[$0], arr=[$1])}}
>  {{  LogicalFilter(condition=[IS NULL($1)])}}
>  {{    EnumerableTableScan(table=[[cat, arr_table]])}}
> But IS NOT NULL is lost
> {{select * from arr_table where arr is not null;}}
> {{LogicalProject(id=[$0], arr=[$1])}}
>  {{  EnumerableTableScan(table=[[cat, arr_table]])}}
>  
> Here IS NULL is either lost or optimized away from the EXPR, it just picks 
> arr[1]
> {{select case when arr IS NULL THEN -111 else arr[1] end from arr_table;}}
> {{LogicalProject(EXPR$0=[ITEM($1, 1)])}}
>  {{  EnumerableTableScan(table=[[cat, arr_table]])}}
> Checking on the integer id instead brings back the case with IS NULL:
> {{select case when id IS NULL THEN -111 else arr[1] end from arr_table;}}
> {{LogicalProject(EXPR$0=[CASE(IS NULL($0), -111, ITEM($1, 1))])}}
>  {{  EnumerableTableScan(table=[[cat, arr_table]])}}
>  
> {{select case when strings IS NULL THEN 'unknown' when strings IS NOT NULL 
> then UNNEST(strings) END from str group by strings;}}
> LogicalProject(EXPR$0=[UNNEST($0)])
>   LogicalAggregate(group=[\{0}])
>     LogicalProject(strings=[$1])
>        EnumerableTableScan(table=[[cat, str]])
>  
> Case disappears from expression in the plan above but switching to scalar 
> bring it back:
> {{select case when id IS NULL THEN 'unknown' when id IS NOT NULL then 
> UNNEST(strings) END from str group by id,strings;}}
>  LogicalProject(EXPR$0=[CASE(IS NULL($0), 'unknown', IS NOT NULL($0), 
> UNNEST($1), null)])
>   LogicalAggregate(group=[\{0, 1}])
>      LogicalProject(id=[$0], strings=[$1])
>       EnumerableTableScan(table=[[cat, str]])



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to