Repository: hive Updated Branches: refs/heads/master ca8ba1d13 -> ea5999724
HIVE-16851: Scalar subquery with group by missing sq_count_check UDF (Vineet Garg, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ea599972 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ea599972 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ea599972 Branch: refs/heads/master Commit: ea5999724e183453954fe80a8ead84efb2d47b7e Parents: ca8ba1d Author: Vineet Garg <vg...@apache.com> Authored: Sun Jun 11 11:50:47 2017 -0700 Committer: Vineet Garg <vg...@apache.com> Committed: Sun Jun 11 11:50:47 2017 -0700 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/parse/QBSubQuery.java | 6 +- .../queries/clientpositive/subquery_scalar.q | 5 + .../clientpositive/llap/subquery_scalar.q.out | 319 +++++++++++++++++++ 3 files changed, 328 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/ea599972/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java index 0097a04..aee9903 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java @@ -594,9 +594,11 @@ public class QBSubQuery implements ISubQueryJoinInfo { // figure out if there is group by boolean noImplicityGby = true; - if ( insertClause.getChildCount() > 3 && - insertClause.getChild(3).getType() == HiveParser.TOK_GROUPBY ) { + for(int i=0; i<insertClause.getChildCount(); i++) { + if(insertClause.getChild(i).getType() == HiveParser.TOK_GROUPBY) { noImplicityGby = false; + break; + } } /* http://git-wip-us.apache.org/repos/asf/hive/blob/ea599972/ql/src/test/queries/clientpositive/subquery_scalar.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/subquery_scalar.q b/ql/src/test/queries/clientpositive/subquery_scalar.q index 366d3d5..876a1e9 100644 --- a/ql/src/test/queries/clientpositive/subquery_scalar.q +++ b/ql/src/test/queries/clientpositive/subquery_scalar.q @@ -209,3 +209,8 @@ group by key, value having count(*) > (select count(*) from src s1 where s1.key > '9' ) ; +-- since subquery has implicit group by this should have sq_count_check (HIVE-16793) +explain select * from part where p_size > (select max(p_size) from part group by p_type); +-- same as above, for correlated columns +explain select * from part where p_size > (select max(p_size) from part p where p.p_type = part.p_type group by p_type); + http://git-wip-us.apache.org/repos/asf/hive/blob/ea599972/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index b78df8b..e94edff 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -5687,3 +5687,322 @@ having count(*) > (select count(*) from src s1 where s1.key > '9' ) POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain select * from part where p_size > (select max(p_size) from part group by p_type) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from part where p_size > (select max(p_size) from part group by p_type) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string) + outputColumnNames: p_type + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: p_type, p_size + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: max(p_size) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 + 1 + 2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 + Statistics: Num rows: 338 Data size: 210574 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col5 > _col10) (type: boolean) + Statistics: Num rows: 112 Data size: 69776 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 112 Data size: 69328 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 112 Data size: 69328 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (sq_count_check(_col0) <= 1) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from part where p_size > (select max(p_size) from part p where p.p_type = part.p_type group by p_type) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from part where p_size > (select max(p_size) from part p where p.p_type = part.p_type group by p_type) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col4 (type: string) + sort order: + + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: max(p_size) + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col4 (type: string) + 1 _col0 (type: string) + 2 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11 + residual filter predicates: {(_col5 > _col11)} + Statistics: Num rows: 1 Data size: 623 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (sq_count_check(_col1) <= 1) (type: boolean) + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +