This is an automated email from the ASF dual-hosted git repository.
dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 841be7f43e7 HIVE-27801: Exists subquery rewrite results in a wrong
plan (Denys Kuzmenko, reviewed by Attila Turoczy, Ayush Saxena)
841be7f43e7 is described below
commit 841be7f43e73387c1024c26c26566f336f1972d3
Author: Denys Kuzmenko <[email protected]>
AuthorDate: Wed Dec 13 11:15:18 2023 +0200
HIVE-27801: Exists subquery rewrite results in a wrong plan (Denys
Kuzmenko, reviewed by Attila Turoczy, Ayush Saxena)
Closes #4922
---
.../calcite/rules/HiveRelDecorrelator.java | 16 ++----
.../subquery_complex_correlation_predicates.q | 15 +++++
.../subquery_complex_correlation_predicates.q.out | 66 ++++++++++++++++++++++
3 files changed, 85 insertions(+), 12 deletions(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
index fd54c0fd689..f10a19e41ad 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
@@ -3014,18 +3014,10 @@ public final class HiveRelDecorrelator implements
ReflectiveVisitor {
}
/** Builds a {@link org.apache.calcite.sql2rel.RelDecorrelator.CorelMap}. */
private static class CorelMapBuilder extends HiveRelShuttleImpl {
- private final SortedMap<CorrelationId, RelNode> mapCorToCorRel =
- new TreeMap<>();
-
- private final SortedSetMultimap<RelNode, CorRef> mapRefRelToCorRef =
- Multimaps.newSortedSetMultimap(
- new HashMap<RelNode, Collection<CorRef>>(),
- new Supplier<TreeSet<CorRef>>() {
- @Override
- public TreeSet<CorRef> get() {
- return Sets.newTreeSet();
- }
- });
+ private final SortedMap<CorrelationId, RelNode> mapCorToCorRel = new
TreeMap<>();
+
+ private final Multimap<RelNode, CorRef> mapRefRelToCorRef =
+ Multimaps.newListMultimap(new HashMap<>(), Lists::newArrayList);
private final Map<RexFieldAccess, CorRef> mapFieldAccessToCorVar = new
HashMap<>();
diff --git
a/ql/src/test/queries/clientpositive/subquery_complex_correlation_predicates.q
b/ql/src/test/queries/clientpositive/subquery_complex_correlation_predicates.q
index ccfed8dcb23..1d6dbbee8c0 100644
---
a/ql/src/test/queries/clientpositive/subquery_complex_correlation_predicates.q
+++
b/ql/src/test/queries/clientpositive/subquery_complex_correlation_predicates.q
@@ -78,3 +78,18 @@ where not exists
(select a_authorkey
from author a
where coalesce(b.b_authorkey, 400) = coalesce(a.a_authorkey, 400));
+
+-- HIVE-27801: Exists subquery rewrite results in a wrong plan
+drop table if exists store_sales;
+create table store_sales (promo_sk int, sales_price int, list_price int);
+
+insert into store_sales values (1, 20, 15), (1, 15, 20), (1, 10, 15);
+
+explain cbo
+select * from store_sales A where exists(
+select 1 from store_sales B
+ where A.promo_sk = B.promo_sk and A.sales_price > B.list_price and
A.sales_price < B.sales_price);
+
+select * from store_sales A where exists(
+select 1 from store_sales B
+ where A.promo_sk = B.promo_sk and A.sales_price > B.list_price and
A.sales_price < B.sales_price);
diff --git
a/ql/src/test/results/clientpositive/llap/subquery_complex_correlation_predicates.q.out
b/ql/src/test/results/clientpositive/llap/subquery_complex_correlation_predicates.q.out
index fa9768cc5de..95055833efd 100644
---
a/ql/src/test/results/clientpositive/llap/subquery_complex_correlation_predicates.q.out
+++
b/ql/src/test/results/clientpositive/llap/subquery_complex_correlation_predicates.q.out
@@ -290,3 +290,69 @@ POSTHOOK: Input: default@author
POSTHOOK: Input: default@book
#### A masked pattern was here ####
Men Without Women
+PREHOOK: query: drop table if exists store_sales
+PREHOOK: type: DROPTABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: drop table if exists store_sales
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: database:default
+PREHOOK: query: create table store_sales (promo_sk int, sales_price int,
list_price int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@store_sales
+POSTHOOK: query: create table store_sales (promo_sk int, sales_price int,
list_price int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@store_sales
+PREHOOK: query: insert into store_sales values (1, 20, 15), (1, 15, 20), (1,
10, 15)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@store_sales
+POSTHOOK: query: insert into store_sales values (1, 20, 15), (1, 15, 20), (1,
10, 15)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@store_sales
+POSTHOOK: Lineage: store_sales.list_price SCRIPT []
+POSTHOOK: Lineage: store_sales.promo_sk SCRIPT []
+POSTHOOK: Lineage: store_sales.sales_price SCRIPT []
+PREHOOK: query: explain cbo
+select * from store_sales A where exists(
+select 1 from store_sales B
+ where A.promo_sk = B.promo_sk and A.sales_price > B.list_price and
A.sales_price < B.sales_price)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store_sales
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select * from store_sales A where exists(
+select 1 from store_sales B
+ where A.promo_sk = B.promo_sk and A.sales_price > B.list_price and
A.sales_price < B.sales_price)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@store_sales
+#### A masked pattern was here ####
+CBO PLAN:
+HiveSemiJoin(condition=[AND(=($3, $0), =($4, $1))], joinType=[semi])
+ HiveProject(promo_sk=[$0], sales_price=[$1], list_price=[$2])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))])
+ HiveTableScan(table=[[default, store_sales]], table:alias=[a])
+ HiveProject(promo_sk0=[$3], sales_price0=[$4])
+ HiveJoin(condition=[AND(=($3, $0), >($4, $2), <($4, $1))],
joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(promo_sk=[$0], sales_price=[$1], list_price=[$2])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2), IS NOT
NULL($1))])
+ HiveTableScan(table=[[default, store_sales]], table:alias=[b])
+ HiveProject(promo_sk=[$0], sales_price=[$1])
+ HiveAggregate(group=[{0, 1}])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))])
+ HiveTableScan(table=[[default, store_sales]], table:alias=[a])
+
+PREHOOK: query: select * from store_sales A where exists(
+select 1 from store_sales B
+ where A.promo_sk = B.promo_sk and A.sales_price > B.list_price and
A.sales_price < B.sales_price)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store_sales
+#### A masked pattern was here ####
+POSTHOOK: query: select * from store_sales A where exists(
+select 1 from store_sales B
+ where A.promo_sk = B.promo_sk and A.sales_price > B.list_price and
A.sales_price < B.sales_price)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@store_sales
+#### A masked pattern was here ####