[ 
https://issues.apache.org/jira/browse/HIVE-29176?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Thomas Rebele updated HIVE-29176:
---------------------------------
    Description: 
Given the following script:
{code:java}
--! qt:dataset:src
--! qt:dataset:part
-- SORT_QUERY_RESULTS

create table t1_n55 as select cast(key as int) key, value from src where key <= 
10;

select * from t1_n55 sort by key;

create table t2_n33 as select cast(2*key as int) key, value from t1_n55 union 
select 10 key, null value;

select * from t2_n33 sort by key;

create table t3_n12 as select * from (select * from t1_n55 union all select * 
from t2_n33) b;
select * from t3_n12 sort by key, value;

SET hive.auto.convert.anti.join=true;
select "\nantijoin=true\n";

explain cbo select a.key, a.value from t1_n55 a left join t2_n33 b on 
a.key=b.key where b.value is null;
select a.key, a.value from t1_n55 a left join t2_n33 b on a.key=b.key where 
b.value is null;

SET hive.auto.convert.anti.join=false;
select "\nantijoin=false\n";

explain cbo select a.key, a.value from t1_n55 a left join t2_n33 b on 
a.key=b.key where b.value is null;
select a.key, a.value from t1_n55 a left join t2_n33 b on a.key=b.key where 
b.value is null;
{code}
There's a difference when executing the query with/without anti-join:

Result with antijoin:
{code:java}
2       val_2
5       val_5
5       val_5
5       val_5
9       val_9
{code}
Result without antijoin:
{code:java}
10      val_10
2       val_2
5       val_5
5       val_5
5       val_5
9       val_9
{code}

  was:
Using tables as defined in `ql/src/test/queries/clientpositive/antijoin.q`, 
except changing the definition of t2_n33 to:
{code:java}
create table t2_n33 as select cast(2*key as int) key, value from t1_n55 union 
select 10 key, null value;
{code}
There's a difference when executing the query
{code:java}
select a.key, a.value, ''+a.value+b.value from t1_n55 a left join t2_n33 b on 
a.key=b.key where b.value is null;
{code}
With HiveJoin, the result contains a row {{{}10 val_10 NULL{}}}, with 
HiveAntiJoin the row is missing.


> AntiJoin may not be introduced when the IS NULL filter is applied to a 
> nullable column
> --------------------------------------------------------------------------------------
>
>                 Key: HIVE-29176
>                 URL: https://issues.apache.org/jira/browse/HIVE-29176
>             Project: Hive
>          Issue Type: Bug
>            Reporter: Thomas Rebele
>            Assignee: Thomas Rebele
>            Priority: Major
>
> Given the following script:
> {code:java}
> --! qt:dataset:src
> --! qt:dataset:part
> -- SORT_QUERY_RESULTS
> create table t1_n55 as select cast(key as int) key, value from src where key 
> <= 10;
> select * from t1_n55 sort by key;
> create table t2_n33 as select cast(2*key as int) key, value from t1_n55 union 
> select 10 key, null value;
> select * from t2_n33 sort by key;
> create table t3_n12 as select * from (select * from t1_n55 union all select * 
> from t2_n33) b;
> select * from t3_n12 sort by key, value;
> SET hive.auto.convert.anti.join=true;
> select "\nantijoin=true\n";
> explain cbo select a.key, a.value from t1_n55 a left join t2_n33 b on 
> a.key=b.key where b.value is null;
> select a.key, a.value from t1_n55 a left join t2_n33 b on a.key=b.key where 
> b.value is null;
> SET hive.auto.convert.anti.join=false;
> select "\nantijoin=false\n";
> explain cbo select a.key, a.value from t1_n55 a left join t2_n33 b on 
> a.key=b.key where b.value is null;
> select a.key, a.value from t1_n55 a left join t2_n33 b on a.key=b.key where 
> b.value is null;
> {code}
> There's a difference when executing the query with/without anti-join:
> Result with antijoin:
> {code:java}
> 2     val_2
> 5     val_5
> 5     val_5
> 5     val_5
> 9     val_9
> {code}
> Result without antijoin:
> {code:java}
> 10    val_10
> 2     val_2
> 5     val_5
> 5     val_5
> 5     val_5
> 9     val_9
> {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to