hive git commit: HIVE-16851: Scalar subquery with group by missing sq_count_check UDF (Vineet Garg, reviewed by Ashutosh Chauhan)

vgarg Sun, 11 Jun 2017 11:52:11 -0700

Repository: hive
Updated Branches:
  refs/heads/master ca8ba1d13 -> ea5999724



HIVE-16851: Scalar subquery with group by missing sq_count_check UDF
(Vineet Garg, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ea599972
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ea599972
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ea599972

Branch: refs/heads/master
Commit: ea5999724e183453954fe80a8ead84efb2d47b7e
Parents: ca8ba1d
Author: Vineet Garg <vg...@apache.com>
Authored: Sun Jun 11 11:50:47 2017 -0700
Committer: Vineet Garg <vg...@apache.com>
Committed: Sun Jun 11 11:50:47 2017 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/parse/QBSubQuery.java |   6 +-
 .../queries/clientpositive/subquery_scalar.q    |   5 +
 .../clientpositive/llap/subquery_scalar.q.out   | 319 +++++++++++++++++++
 3 files changed, 328 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/ea599972/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
index 0097a04..aee9903 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
@@ -594,9 +594,11 @@ public class QBSubQuery implements ISubQueryJoinInfo {
 
     // figure out if there is group by
     boolean noImplicityGby = true;
-    if ( insertClause.getChildCount() > 3 &&
-            insertClause.getChild(3).getType() == HiveParser.TOK_GROUPBY ) {
+    for(int i=0; i<insertClause.getChildCount(); i++) {
+      if(insertClause.getChild(i).getType() == HiveParser.TOK_GROUPBY) {
         noImplicityGby = false;
+        break;
+      }
     }
 
     /*

http://git-wip-us.apache.org/repos/asf/hive/blob/ea599972/ql/src/test/queries/clientpositive/subquery_scalar.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_scalar.q 
b/ql/src/test/queries/clientpositive/subquery_scalar.q
index 366d3d5..876a1e9 100644
--- a/ql/src/test/queries/clientpositive/subquery_scalar.q
+++ b/ql/src/test/queries/clientpositive/subquery_scalar.q
@@ -209,3 +209,8 @@ group by key, value
 having count(*) > (select count(*) from src s1 where s1.key > '9' )
 ;
 
+-- since subquery has implicit group by this should have sq_count_check 
(HIVE-16793)
+explain  select * from part where p_size > (select max(p_size) from part group 
by p_type);
+-- same as above, for correlated columns
+explain  select * from part where p_size > (select max(p_size) from part p 
where p.p_type = part.p_type group by p_type);
+

http://git-wip-us.apache.org/repos/asf/hive/blob/ea599972/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out 
b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
index b78df8b..e94edff 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
@@ -5687,3 +5687,322 @@ having count(*) > (select count(*) from src s1 where 
s1.key > '9' )
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 #### A masked pattern was here ####
+Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in 
Stage 'Reducer 2' is a cross product
+PREHOOK: query: explain  select * from part where p_size > (select max(p_size) 
from part group by p_type)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain  select * from part where p_size > (select 
max(p_size) from part group by p_type)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 5 
(CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE)
+        Reducer 4 <- Map 3 (SIMPLE_EDGE)
+        Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
+        Reducer 7 <- Map 6 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 16094 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: p_partkey (type: int), p_name (type: string), 
p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size 
(type: int), p_container (type: string), p_retailprice (type: double), 
p_comment (type: string)
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8
+                    Statistics: Num rows: 26 Data size: 16094 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 26 Data size: 16094 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col0 (type: int), _col1 (type: 
string), _col2 (type: string), _col3 (type: string), _col4 (type: string), 
_col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: 
string)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 2704 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: p_type (type: string)
+                    outputColumnNames: p_type
+                    Statistics: Num rows: 26 Data size: 2704 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: p_type (type: string)
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 13 Data size: 1352 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 13 Data size: 1352 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 2808 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: p_type (type: string), p_size (type: int)
+                    outputColumnNames: p_type, p_size
+                    Statistics: Num rows: 26 Data size: 2808 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: max(p_size)
+                      keys: p_type (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 13 Data size: 1404 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 13 Data size: 1404 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: int)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                     Inner Join 0 to 2
+                keys:
+                  0 
+                  1 
+                  2 
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col10
+                Statistics: Num rows: 338 Data size: 210574 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Filter Operator
+                  predicate: (_col5 > _col10) (type: boolean)
+                  Statistics: Num rows: 112 Data size: 69776 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: _col0 (type: int), _col1 (type: string), 
_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: 
int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8
+                    Statistics: Num rows: 112 Data size: 69328 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 112 Data size: 69328 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  Statistics: Num rows: 13 Data size: 1352 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: count()
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col0 (type: bigint)
+        Reducer 5 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Filter Operator
+                  predicate: (sq_count_check(_col0) <= 1) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+        Reducer 7 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: max(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: int)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 13 Data size: 52 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    value expressions: _col0 (type: int)
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain  select * from part where p_size > (select max(p_size) 
from part p where p.p_type = part.p_type group by p_type)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain  select * from part where p_size > (select 
max(p_size) from part p where p.p_type = part.p_type group by p_type)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE), Reducer 6 
(SIMPLE_EDGE)
+        Reducer 4 <- Map 3 (SIMPLE_EDGE)
+        Reducer 6 <- Map 5 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 16094 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: p_partkey (type: int), p_name (type: string), 
p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size 
(type: int), p_container (type: string), p_retailprice (type: double), 
p_comment (type: string)
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8
+                    Statistics: Num rows: 26 Data size: 16094 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col4 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col4 (type: string)
+                      Statistics: Num rows: 26 Data size: 16094 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col0 (type: int), _col1 (type: 
string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 
(type: string), _col7 (type: double), _col8 (type: string)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: p
+                  Statistics: Num rows: 26 Data size: 2704 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: p_type is not null (type: boolean)
+                    Statistics: Num rows: 26 Data size: 2704 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: p_type (type: string)
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 13 Data size: 1352 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 13 Data size: 1352 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: p
+                  Statistics: Num rows: 26 Data size: 2808 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: p_type is not null (type: boolean)
+                    Statistics: Num rows: 26 Data size: 2808 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: max(p_size)
+                      keys: p_type (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 13 Data size: 1404 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 13 Data size: 1404 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: int)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Outer Join 0 to 1
+                     Inner Join 0 to 2
+                keys:
+                  0 _col4 (type: string)
+                  1 _col0 (type: string)
+                  2 _col1 (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col11
+                residual filter predicates: {(_col5 > _col11)}
+                Statistics: Num rows: 1 Data size: 623 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: string), _col2 
(type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), 
_col6 (type: string), _col7 (type: double), _col8 (type: string)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
+                  Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 619 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Group By Operator
+                  aggregations: count()
+                  keys: _col0 (type: string)
+                  mode: complete
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 13 Data size: 1456 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (sq_count_check(_col1) <= 1) (type: boolean)
+                    Statistics: Num rows: 4 Data size: 448 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: _col0 (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 4 Data size: 448 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 4 Data size: 448 Basic stats: 
COMPLETE Column stats: COMPLETE
+        Reducer 6 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: max(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: int), _col0 (type: string)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 13 Data size: 1404 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col1 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col1 (type: string)
+                    Statistics: Num rows: 13 Data size: 1404 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    value expressions: _col0 (type: int)
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

hive git commit: HIVE-16851: Scalar subquery with group by missing sq_count_check UDF (Vineet Garg, reviewed by Ashutosh Chauhan)

Reply via email to