HIVE-16689: Correlated scalar subquery with comparison to constant in predicate fails(Vineet Garg reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6430064f Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6430064f Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6430064f Branch: refs/heads/hive-14535 Commit: 6430064f7e8b3cf33a8c69c368dfeddb55b635ca Parents: ea4807d Author: Vineet Garg <[email protected]> Authored: Tue May 23 11:00:27 2017 -0700 Committer: Vineet Garg <[email protected]> Committed: Tue May 23 11:00:27 2017 -0700 ---------------------------------------------------------------------- .../calcite/rules/HiveRelFieldTrimmer.java | 4 + .../queries/clientpositive/subquery_scalar.q | 4 + .../clientpositive/llap/subquery_scalar.q.out | 131 +++++++++++++++++++ 3 files changed, 139 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/6430064f/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java index 2eb0f19..1801b83 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java @@ -313,4 +313,8 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer { } } } + + protected TrimResult result(RelNode r, final Mapping mapping) { + return new TrimResult(r, mapping); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/6430064f/ql/src/test/queries/clientpositive/subquery_scalar.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/subquery_scalar.q b/ql/src/test/queries/clientpositive/subquery_scalar.q index c7aeb11..366d3d5 100644 --- a/ql/src/test/queries/clientpositive/subquery_scalar.q +++ b/ql/src/test/queries/clientpositive/subquery_scalar.q @@ -67,6 +67,10 @@ select * from part where (p_partkey*p_size) <> (select min(p_partkey) from part) explain select count(*) as c from part as e where p_size + 100 < (select max(p_partkey) from part where p_name = e.p_name); select count(*) as c from part as e where p_size + 100 < (select max(p_partkey) from part where p_name = e.p_name); +-- corr, lhs contain constant expressions (HIVE-16689) +explain select count(*) as c from part as e where 100 < (select max(p_partkey) from part where p_name = e.p_name); +select count(*) as c from part as e where 100 < (select max(p_partkey) from part where p_name = e.p_name); + -- corr, equi-join predicate explain select * from part where p_size > (select avg(p_size) from part_null where part_null.p_type = part.p_type); http://git-wip-us.apache.org/repos/asf/hive/blob/6430064f/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index 84b7071..174492c 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -1453,6 +1453,137 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### 26 +PREHOOK: query: explain select count(*) as c from part as e where 100 < (select max(p_partkey) from part where p_name = e.p_name) +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(*) as c from part as e where 100 < (select max(p_partkey) from part where p_name = e.p_name) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: max(p_partkey) + keys: p_name (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (100 < CASE WHEN (_col2 is null) THEN (null) ELSE (_col1) END) (type: boolean) + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), true (type: boolean), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: boolean) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) as c from part as e where 100 < (select max(p_partkey) from part where p_name = e.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) as c from part as e where 100 < (select max(p_partkey) from part where p_name = e.p_name) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +26 PREHOOK: query: explain select * from part where p_size > (select avg(p_size) from part_null where part_null.p_type = part.p_type) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part where p_size > (select avg(p_size) from part_null where part_null.p_type = part.p_type)
