Denys Kuzmenko created HIVE-27801:
-------------------------------------
Summary: Exists subquery rewrite into LEFT SEMI JOIN produce
incorrect plan
Key: HIVE-27801
URL: https://issues.apache.org/jira/browse/HIVE-27801
Project: Hive
Issue Type: Bug
Reporter: Denys Kuzmenko
reproduce:
{code}
set hive.explain.user=false;
set hive.support.concurrency=true;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.stats.autogather=false;
set hive.exec.dynamic.partition.mode=nonstrict;
drop table if exists store_sales;
create table store_sales (ss_promo_sk int, ss_sales_price int, ss_list_price
int) stored as orc tblproperties('transactional'='true');
insert into store_sales values (1, 20, 15), (1, 15, 20), (1, 10, 15);
explain cbo
select * from store_sales A where exists (
select 1 from store_sales B
where a.ss_promo_sk=b.ss_promo_sk and A.ss_sales_price>B.ss_list_price and
A.ss_sales_price<B.ss_sales_price);
select * from store_sales A where exists(
select 1 from store_sales B
where A.ss_promo_sk=B.ss_promo_sk and A.ss_sales_price>B.ss_list_price and
A.ss_sales_price<B.ss_sales_price);
explain cbo
select * from store_sales A
LEFT SEMI JOIN store_sales B
ON a.ss_promo_sk=b.ss_promo_sk and A.ss_sales_price>B.ss_list_price and
A.ss_sales_price<B.ss_sales_price;
select * from store_sales A
LEFT SEMI JOIN store_sales B
ON a.ss_promo_sk=b.ss_promo_sk and A.ss_sales_price>B.ss_list_price and
A.ss_sales_price<B.ss_sales_price;
{code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)