Denys Kuzmenko created HIVE-27801:
-------------------------------------

             Summary: Exists subquery rewrite into LEFT SEMI JOIN produce 
incorrect plan
                 Key: HIVE-27801
                 URL: https://issues.apache.org/jira/browse/HIVE-27801
             Project: Hive
          Issue Type: Bug
            Reporter: Denys Kuzmenko


reproduce:
{code}
set hive.explain.user=false;
set hive.support.concurrency=true;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.stats.autogather=false;
set hive.exec.dynamic.partition.mode=nonstrict;

drop table if exists store_sales;

create table store_sales (ss_promo_sk int, ss_sales_price int, ss_list_price 
int) stored as orc tblproperties('transactional'='true');
insert into store_sales values (1, 20, 15), (1, 15, 20), (1, 10, 15);

explain cbo 
select * from store_sales A where exists ( 
select 1 from store_sales B 
    where a.ss_promo_sk=b.ss_promo_sk and A.ss_sales_price>B.ss_list_price and 
A.ss_sales_price<B.ss_sales_price);
    
select * from store_sales A where exists( 
select 1 from store_sales B 
    where A.ss_promo_sk=B.ss_promo_sk and A.ss_sales_price>B.ss_list_price and 
A.ss_sales_price<B.ss_sales_price);  
    
explain cbo    
select * from store_sales A 
LEFT SEMI JOIN store_sales B 
ON a.ss_promo_sk=b.ss_promo_sk and A.ss_sales_price>B.ss_list_price and 
A.ss_sales_price<B.ss_sales_price;     

select * from store_sales A 
LEFT SEMI JOIN store_sales B 
ON a.ss_promo_sk=b.ss_promo_sk and A.ss_sales_price>B.ss_list_price and 
A.ss_sales_price<B.ss_sales_price;    
{code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to