[04/15] hive git commit: HIVE-15905 : Inefficient plan for correlated subqueries (Vineet Garg via Ashutosh Chauhan)

hashutosh Tue, 14 Feb 2017 17:26:36 -0800

http://git-wip-us.apache.org/repos/asf/hive/blob/bddf5a7a/ql/src/test/results/clientpositive/spark/subquery_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/subquery_in.q.out 
b/ql/src/test/results/clientpositive/spark/subquery_in.q.out
index 0fde046..1ef17d9 100644
--- a/ql/src/test/results/clientpositive/spark/subquery_in.q.out
+++ b/ql/src/test/results/clientpositive/spark/subquery_in.q.out
@@ -132,9 +132,7 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 
(PARTITION-LEVEL SORT, 2)
-        Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 
(PARTITION-LEVEL SORT, 2)
-        Reducer 6 <- Map 5 (GROUP, 2)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL 
SORT, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -157,37 +155,22 @@ STAGE PLANS:
                   alias: a
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (key > '9') (type: boolean)
-                    Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
+                    predicate: ((value = value) and (key > '9')) (type: 
boolean)
+                    Statistics: Num rows: 83 Data size: 881 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col1 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col1 (type: string)
-                        Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: string)
-        Map 5 
-            Map Operator Tree:
-                TableScan
-                  alias: b
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: value (type: string)
-                    outputColumnNames: value
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                    Group By Operator
-                      keys: value (type: string)
-                      mode: hash
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 83 Data size: 881 Basic stats: 
COMPLETE Column stats: NONE
+                      Group By Operator
+                        keys: _col0 (type: string), _col1 (type: string)
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 83 Data size: 881 Basic stats: 
COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string), _col1 (type: 
string)
+                          sort order: ++
+                          Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                          Statistics: Num rows: 83 Data size: 881 Basic stats: 
COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
               Join Operator
@@ -205,42 +188,6 @@ STAGE PLANS:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 4 
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col1 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col0, _col2
-                Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: string), _col2 (type: string)
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
-                  Group By Operator
-                    keys: _col0 (type: string), _col1 (type: string)
-                    mode: hash
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string), _col1 (type: 
string)
-                      sort order: ++
-                      Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
-                      Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 6 
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: string)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator
@@ -472,12 +419,10 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 7 
(PARTITION-LEVEL SORT, 2)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 
(PARTITION-LEVEL SORT, 2)
         Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2)
-        Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2), Reducer 9 
(PARTITION-LEVEL SORT, 2)
+        Reducer 5 <- Reducer 4 (GROUP, 2)
         Reducer 6 <- Reducer 5 (GROUP, 2)
-        Reducer 7 <- Reducer 6 (GROUP, 2)
-        Reducer 9 <- Map 8 (GROUP, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -500,31 +445,15 @@ STAGE PLANS:
                 TableScan
                   alias: part
                   Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: p_mfgr (type: string), p_size (type: int)
-                    sort order: ++
-                    Map-reduce partition columns: p_mfgr (type: string)
-                    Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                    TopN Hash Memory Usage: 0.1
-        Map 8 
-            Map Operator Tree:
-                TableScan
-                  alias: b
-                  Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: p_mfgr (type: string)
-                    outputColumnNames: p_mfgr
-                    Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                    Group By Operator
-                      keys: p_mfgr (type: string)
-                      mode: hash
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (p_mfgr = p_mfgr) (type: boolean)
+                    Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: p_mfgr (type: string), p_size (type: 
int)
+                      sort order: ++
+                      Map-reduce partition columns: p_mfgr (type: string)
+                      Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
+                      TopN Hash Memory Usage: 0.1
         Reducer 2 
             Reduce Operator Tree:
               Join Operator
@@ -551,7 +480,7 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: string), 
KEY.reducesinkkey1 (type: int)
                 outputColumnNames: _col2, _col5
-                Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE 
Column stats: NONE
                 PTF Operator
                   Function definitions:
                       Input definition
@@ -572,92 +501,60 @@ STAGE PLANS:
                               window function: GenericUDAFRankEvaluator
                               window frame: PRECEDING(MAX)~FOLLOWING(MAX)
                               isPivotResult: true
-                  Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (rank_window_0 <= 2) (type: boolean)
-                    Statistics: Num rows: 8 Data size: 968 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 4 Data size: 484 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: _col2 (type: string), _col5 (type: int)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 8 Data size: 968 Basic stats: 
COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 8 Data size: 968 Basic stats: 
COMPLETE Column stats: NONE
-                        value expressions: _col1 (type: int)
+                      Statistics: Num rows: 4 Data size: 484 Basic stats: 
COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: min(_col1)
+                        keys: _col0 (type: string)
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 4 Data size: 484 Basic stats: 
COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: string)
+                          Statistics: Num rows: 4 Data size: 484 Basic stats: 
COMPLETE Column stats: NONE
+                          value expressions: _col1 (type: int)
         Reducer 5 
             Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col0 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col1, _col2
-                Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE 
Column stats: NONE
-                Select Operator
-                  expressions: _col2 (type: string), _col1 (type: int)
-                  outputColumnNames: _col2, _col1
-                  Statistics: Num rows: 14 Data size: 1730 Basic stats: 
COMPLETE Column stats: NONE
-                  Group By Operator
-                    aggregations: min(_col1)
-                    keys: _col2 (type: string)
-                    mode: hash
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 14 Data size: 1730 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: string)
-                      Statistics: Num rows: 14 Data size: 1730 Basic stats: 
COMPLETE Column stats: NONE
-                      value expressions: _col1 (type: int)
-        Reducer 6 
-            Reduce Operator Tree:
               Group By Operator
                 aggregations: min(VALUE._col0)
                 keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE 
Column stats: NONE
                 Group By Operator
                   keys: _col0 (type: string), _col1 (type: int)
                   mode: hash
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE 
Column stats: NONE
+                  Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE 
Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: string), _col1 (type: int)
                     sort order: ++
                     Map-reduce partition columns: _col0 (type: string), _col1 
(type: int)
-                    Statistics: Num rows: 7 Data size: 865 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 7 
+                    Statistics: Num rows: 2 Data size: 242 Basic stats: 
COMPLETE Column stats: NONE
+        Reducer 6 
             Reduce Operator Tree:
               Group By Operator
                 keys: KEY._col0 (type: string), KEY._col1 (type: int)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
                   expressions: _col1 (type: int), _col0 (type: string)
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE 
Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE 
Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col1 (type: string), _col0 (type: int)
                     sort order: ++
                     Map-reduce partition columns: _col1 (type: string), _col0 
(type: int)
-                    Statistics: Num rows: 3 Data size: 370 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 9 
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: string)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE 
Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1 Data size: 121 Basic stats: 
COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator
@@ -715,10 +612,8 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 
(PARTITION-LEVEL SORT, 2)
-        Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 
(PARTITION-LEVEL SORT, 2)
-        Reducer 5 <- Reducer 4 (GROUP, 2)
-        Reducer 7 <- Map 6 (GROUP, 2)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 
(PARTITION-LEVEL SORT, 2)
+        Reducer 4 <- Map 3 (GROUP, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -741,37 +636,18 @@ STAGE PLANS:
                   alias: a
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (key > '9') (type: boolean)
-                    Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: string), value (type: string)
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col1 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col1 (type: string)
-                        Statistics: Num rows: 166 Data size: 1763 Basic stats: 
COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: string)
-        Map 6 
-            Map Operator Tree:
-                TableScan
-                  alias: b
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: value (type: string)
-                    outputColumnNames: value
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                    predicate: ((value = value) and (key > '9')) (type: 
boolean)
+                    Statistics: Num rows: 83 Data size: 881 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
-                      keys: value (type: string)
+                      keys: key (type: string), value (type: string)
                       mode: hash
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 83 Data size: 881 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                        key expressions: _col0 (type: string), _col1 (type: 
string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                        Statistics: Num rows: 83 Data size: 881 Basic stats: 
COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
               Join Operator
@@ -791,53 +667,21 @@ STAGE PLANS:
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
         Reducer 4 
             Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col1 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col0, _col2
-                Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col0 (type: string), _col2 (type: string)
-                  mode: hash
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string), _col1 (type: string)
-                    sort order: ++
-                    Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
-                    Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 5 
-            Reduce Operator Tree:
               Group By Operator
                 keys: KEY._col0 (type: string), KEY._col1 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 137 Data size: 1455 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE 
Column stats: NONE
                 Group By Operator
                   keys: _col0 (type: string), _col1 (type: string)
                   mode: hash
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 137 Data size: 1455 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 41 Data size: 435 Basic stats: 
COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: string), _col1 (type: string)
                     sort order: ++
                     Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
-                    Statistics: Num rows: 137 Data size: 1455 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 7 
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: string)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 41 Data size: 435 Basic stats: 
COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator
@@ -1075,7 +919,6 @@ POSTHOOK: Input: default@lineitem
 #### A masked pattern was here ####
 108570 8571
 4297   1798
-Warning: Shuffle Join JOIN[14][tables = [$hdt$_3, $hdt$_4]] in Work 'Reducer 
11' is a cross product
 PREHOOK: query: explain select sum(l_extendedprice) from lineitem, part where 
p_partkey = l_partkey and l_quantity IN (select avg(l_quantity) from lineitem 
where l_partkey = p_partkey)
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select sum(l_extendedprice) from lineitem, part where 
p_partkey = l_partkey and l_quantity IN (select avg(l_quantity) from lineitem 
where l_partkey = p_partkey)
@@ -1088,14 +931,11 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 1), Map 13 
(PARTITION-LEVEL SORT, 1)
-        Reducer 12 <- Reducer 11 (GROUP, 2)
         Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL 
SORT, 2)
-        Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 9 
(PARTITION-LEVEL SORT, 2)
+        Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 8 
(PARTITION-LEVEL SORT, 2)
         Reducer 4 <- Reducer 3 (GROUP, 1)
-        Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 12 
(PARTITION-LEVEL SORT, 2)
+        Reducer 7 <- Map 6 (GROUP, 2)
         Reducer 8 <- Reducer 7 (GROUP, 2)
-        Reducer 9 <- Reducer 8 (GROUP, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1116,29 +956,6 @@ STAGE PLANS:
                         Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 100 Data size: 11999 Basic 
stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: double), _col2 (type: 
double)
-        Map 10 
-            Map Operator Tree:
-                TableScan
-                  alias: lineitem
-                  Statistics: Num rows: 100 Data size: 11999 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Select Operator
-                    Statistics: Num rows: 100 Data size: 400 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Reduce Output Operator
-                      sort order: 
-                      Statistics: Num rows: 100 Data size: 400 Basic stats: 
COMPLETE Column stats: COMPLETE
-        Map 13 
-            Map Operator Tree:
-                TableScan
-                  alias: part
-                  Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: p_partkey (type: int)
-                    outputColumnNames: _col0
-                    Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      sort order: 
-                      Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                      value expressions: _col0 (type: int)
         Map 5 
             Map Operator Tree:
                 TableScan
@@ -1161,49 +978,22 @@ STAGE PLANS:
                 TableScan
                   alias: lineitem
                   Statistics: Num rows: 100 Data size: 11999 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: l_partkey (type: int), l_quantity (type: 
double)
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 100 Data size: 11999 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: int)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: int)
-                      Statistics: Num rows: 100 Data size: 11999 Basic stats: 
COMPLETE Column stats: NONE
-                      value expressions: _col1 (type: double)
-        Reducer 11 
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 
-                  1 
-                outputColumnNames: _col1
-                Statistics: Num rows: 2600 Data size: 327700 Basic stats: 
COMPLETE Column stats: NONE
-                Group By Operator
-                  keys: _col1 (type: int)
-                  mode: hash
-                  outputColumnNames: _col0
-                  Statistics: Num rows: 2600 Data size: 327700 Basic stats: 
COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: int)
-                    sort order: +
-                    Map-reduce partition columns: _col0 (type: int)
-                    Statistics: Num rows: 2600 Data size: 327700 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 12 
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: int)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 1300 Data size: 163850 Basic stats: 
COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: int)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: int)
-                  Statistics: Num rows: 1300 Data size: 163850 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 2 
+                  Filter Operator
+                    predicate: (l_partkey = l_partkey) (type: boolean)
+                    Statistics: Num rows: 50 Data size: 5999 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: avg(l_quantity)
+                      keys: l_partkey (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 50 Data size: 5999 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 50 Data size: 5999 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: 
struct<count:bigint,sum:double,input:double>)
+        Reducer 2 
             Reduce Operator Tree:
               Join Operator
                 condition map:
@@ -1228,7 +1018,7 @@ STAGE PLANS:
                   0 _col1 (type: double), _col3 (type: int)
                   1 _col0 (type: double), _col1 (type: int)
                 outputColumnNames: _col2
-                Statistics: Num rows: 392 Data size: 49494 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 121 Data size: 14517 Basic stats: 
COMPLETE Column stats: NONE
                 Group By Operator
                   aggregations: sum(_col2)
                   mode: hash
@@ -1254,60 +1044,34 @@ STAGE PLANS:
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
         Reducer 7 
             Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col0 (type: int)
-                  1 _col0 (type: int)
-                outputColumnNames: _col1, _col2
-                Statistics: Num rows: 1430 Data size: 180235 Basic stats: 
COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col2 (type: int), _col1 (type: double)
-                  outputColumnNames: _col2, _col1
-                  Statistics: Num rows: 1430 Data size: 180235 Basic stats: 
COMPLETE Column stats: NONE
-                  Group By Operator
-                    aggregations: avg(_col1)
-                    keys: _col2 (type: int)
-                    mode: hash
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 1430 Data size: 180235 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: int)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: int)
-                      Statistics: Num rows: 1430 Data size: 180235 Basic 
stats: COMPLETE Column stats: NONE
-                      value expressions: _col1 (type: 
struct<count:bigint,sum:double,input:double>)
-        Reducer 8 
-            Reduce Operator Tree:
               Group By Operator
                 aggregations: avg(VALUE._col0)
                 keys: KEY._col0 (type: int)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 715 Data size: 90117 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE 
Column stats: NONE
                 Group By Operator
                   keys: _col1 (type: double), _col0 (type: int)
                   mode: hash
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 715 Data size: 90117 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 25 Data size: 2999 Basic stats: 
COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: double), _col1 (type: int)
                     sort order: ++
                     Map-reduce partition columns: _col0 (type: double), _col1 
(type: int)
-                    Statistics: Num rows: 715 Data size: 90117 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 9 
+                    Statistics: Num rows: 25 Data size: 2999 Basic stats: 
COMPLETE Column stats: NONE
+        Reducer 8 
             Reduce Operator Tree:
               Group By Operator
                 keys: KEY._col0 (type: double), KEY._col1 (type: int)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 357 Data size: 44995 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 12 Data size: 1439 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: double), _col1 (type: int)
                   sort order: ++
                   Map-reduce partition columns: _col0 (type: double), _col1 
(type: int)
-                  Statistics: Num rows: 357 Data size: 44995 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12 Data size: 1439 Basic stats: 
COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator
@@ -1315,7 +1079,6 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join JOIN[14][tables = [$hdt$_3, $hdt$_4]] in Work 'Reducer 
11' is a cross product
 PREHOOK: query: select sum(l_extendedprice) from lineitem, part where 
p_partkey = l_partkey and l_quantity IN (select avg(l_quantity) from lineitem 
where l_partkey = p_partkey)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@lineitem
@@ -1339,10 +1102,8 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 
(PARTITION-LEVEL SORT, 2)
-        Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 
(PARTITION-LEVEL SORT, 2)
-        Reducer 5 <- Reducer 4 (GROUP, 2)
-        Reducer 7 <- Map 6 (GROUP, 2)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 
(PARTITION-LEVEL SORT, 2)
+        Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1368,43 +1129,27 @@ STAGE PLANS:
                 TableScan
                   alias: p
                   Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: p_type (type: string), p_size (type: int)
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: string)
-                      Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                      value expressions: _col1 (type: int)
-        Map 6 
-            Map Operator Tree:
-                TableScan
-                  alias: part
-                  Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: p_type (type: string)
-                    outputColumnNames: p_type
-                    Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (p_type = p_type) (type: boolean)
+                    Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
-                      keys: p_type (type: string)
+                      keys: p_type (type: string), p_size (type: int)
                       mode: hash
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
+                        key expressions: _col0 (type: string), _col1 (type: 
int)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: int)
+                        Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
               Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col4 (type: string), _col5 (type: int)
-                  1 _col0 (type: string), _col1 (type: int)
+                  1 _col1 (type: string), _col0 (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
                 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE 
Column stats: NONE
                 File Output Operator
@@ -1416,57 +1161,25 @@ STAGE PLANS:
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
         Reducer 4 
             Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col0 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col1, _col2
-                Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE 
Column stats: NONE
-                Select Operator
-                  expressions: _col2 (type: string), _col1 (type: int)
-                  outputColumnNames: _col2, _col1
-                  Statistics: Num rows: 28 Data size: 3461 Basic stats: 
COMPLETE Column stats: NONE
-                  Group By Operator
-                    keys: _col2 (type: string), _col1 (type: int)
-                    mode: hash
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 28 Data size: 3461 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string), _col1 (type: int)
-                      sort order: ++
-                      Map-reduce partition columns: _col0 (type: string), 
_col1 (type: int)
-                      Statistics: Num rows: 28 Data size: 3461 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 5 
-            Reduce Operator Tree:
               Group By Operator
                 keys: KEY._col0 (type: string), KEY._col1 (type: int)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE 
Column stats: NONE
                 Group By Operator
                   keys: _col0 (type: string), _col1 (type: int)
-                  mode: hash
+                  mode: complete
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 14 Data size: 1730 Basic stats: 
COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string), _col1 (type: int)
-                    sort order: ++
-                    Map-reduce partition columns: _col0 (type: string), _col1 
(type: int)
-                    Statistics: Num rows: 14 Data size: 1730 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 7 
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: string)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE 
Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: _col1 (type: int), _col0 (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 3 Data size: 363 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col1 (type: string), _col0 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col1 (type: string), 
_col0 (type: int)
+                      Statistics: Num rows: 3 Data size: 363 Basic stats: 
COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator
@@ -1779,11 +1492,9 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 
(PARTITION-LEVEL SORT, 2)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 
(PARTITION-LEVEL SORT, 2)
         Reducer 3 <- Reducer 2 (GROUP, 1)
-        Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 8 
(PARTITION-LEVEL SORT, 2)
-        Reducer 6 <- Reducer 5 (GROUP, 2)
-        Reducer 8 <- Map 7 (GROUP, 2)
+        Reducer 5 <- Map 4 (GROUP, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1805,35 +1516,23 @@ STAGE PLANS:
                 TableScan
                   alias: part
                   Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: p_partkey (type: int), p_name (type: string)
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col1 (type: string)
-                      sort order: +
-                      Map-reduce partition columns: _col1 (type: string)
-                      Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                      value expressions: _col0 (type: int)
-        Map 7 
-            Map Operator Tree:
-                TableScan
-                  alias: e
-                  Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: p_name (type: string)
-                    outputColumnNames: p_name
-                    Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                    Group By Operator
-                      keys: p_name (type: string)
-                      mode: hash
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (p_name = p_name) (type: boolean)
+                    Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: p_name (type: string), p_partkey (type: int)
+                      outputColumnNames: p_name, p_partkey
+                      Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
+                      Group By Operator
+                        keys: p_name (type: string), p_partkey (type: int)
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string), _col1 (type: 
int)
+                          sort order: ++
+                          Map-reduce partition columns: _col0 (type: string), 
_col1 (type: int)
+                          Statistics: Num rows: 13 Data size: 1573 Basic 
stats: COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
               Join Operator
@@ -1868,56 +1567,20 @@ STAGE PLANS:
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
         Reducer 5 
             Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col1 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col0, _col2
-                Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE 
Column stats: NONE
-                Select Operator
-                  expressions: _col2 (type: string), _col0 (type: int)
-                  outputColumnNames: _col2, _col0
-                  Statistics: Num rows: 28 Data size: 3461 Basic stats: 
COMPLETE Column stats: NONE
-                  Group By Operator
-                    keys: _col2 (type: string), _col0 (type: int)
-                    mode: hash
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 28 Data size: 3461 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string), _col1 (type: int)
-                      sort order: ++
-                      Map-reduce partition columns: _col0 (type: string), 
_col1 (type: int)
-                      Statistics: Num rows: 28 Data size: 3461 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 6 
-            Reduce Operator Tree:
               Group By Operator
                 keys: KEY._col0 (type: string), KEY._col1 (type: int)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
                   expressions: _col1 (type: int), _col0 (type: string)
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 14 Data size: 1730 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE 
Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col1 (type: string), _col0 (type: int)
                     sort order: ++
                     Map-reduce partition columns: _col1 (type: string), _col0 
(type: int)
-                    Statistics: Num rows: 14 Data size: 1730 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 8 
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: string)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE 
Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 6 Data size: 726 Basic stats: 
COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator
@@ -2100,10 +1763,9 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 
(PARTITION-LEVEL SORT, 2)
-        Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 
(PARTITION-LEVEL SORT, 2)
-        Reducer 5 <- Reducer 4 (GROUP, 2)
-        Reducer 7 <- Map 6 (GROUP, 2)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 
(PARTITION-LEVEL SORT, 2)
+        Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 
(PARTITION-LEVEL SORT, 2)
+        Reducer 6 <- Map 5 (GROUP, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -2116,9 +1778,9 @@ STAGE PLANS:
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8
                     Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
                     Reduce Output Operator
-                      key expressions: _col1 (type: string), _col5 (type: 
int), _col5 (type: int)
-                      sort order: +++
-                      Map-reduce partition columns: _col1 (type: string), 
_col5 (type: int), _col5 (type: int)
+                      key expressions: _col1 (type: string), _col5 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col1 (type: string), 
_col5 (type: int)
                       Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
                       value expressions: _col0 (type: int), _col2 (type: 
string), _col3 (type: string), _col4 (type: string), _col6 (type: string), 
_col7 (type: double), _col8 (type: string)
         Map 3 
@@ -2136,7 +1798,7 @@ STAGE PLANS:
                       Map-reduce partition columns: _col2 (type: int), _col0 
(type: int)
                       Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
                       value expressions: _col1 (type: string)
-        Map 6 
+        Map 5 
             Map Operator Tree:
                 TableScan
                   alias: part
@@ -2159,15 +1821,15 @@ STAGE PLANS:
             Reduce Operator Tree:
               Join Operator
                 condition map:
-                     Inner Join 0 to 1
+                     Left Semi Join 0 to 1
                 keys:
-                  0 _col1 (type: string), _col5 (type: int), _col5 (type: int)
-                  1 _col0 (type: string), _col2 (type: int), _col1 (type: int)
+                  0 _col1 (type: string), _col5 (type: int)
+                  1 _col0 (type: string), _col1 (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
-                Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE 
Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 28 Data size: 3461 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 30 Data size: 3807 Basic stats: 
COMPLETE Column stats: NONE
                   table:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -2182,33 +1844,21 @@ STAGE PLANS:
                   1 _col0 (type: int), (_col0 + 121150) (type: int)
                 outputColumnNames: _col1, _col3
                 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE 
Column stats: NONE
-                Group By Operator
-                  keys: _col1 (type: string), _col3 (type: int)
-                  mode: hash
+                Select Operator
+                  expressions: _col1 (type: string), _col3 (type: int)
                   outputColumnNames: _col0, _col1
                   Statistics: Num rows: 28 Data size: 3461 Basic stats: 
COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string), _col1 (type: int)
-                    sort order: ++
-                    Map-reduce partition columns: _col0 (type: string), _col1 
(type: int)
+                  Group By Operator
+                    keys: _col0 (type: string), _col1 (type: int)
+                    mode: hash
+                    outputColumnNames: _col0, _col1
                     Statistics: Num rows: 28 Data size: 3461 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 5 
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: string), KEY._col1 (type: int)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE 
Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: string), _col1 (type: int), _col1 
(type: int)
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 14 Data size: 1730 Basic stats: 
COMPLETE Column stats: NONE
-                  Reduce Output Operator
-                    key expressions: _col0 (type: string), _col2 (type: int), 
_col1 (type: int)
-                    sort order: +++
-                    Map-reduce partition columns: _col0 (type: string), _col2 
(type: int), _col1 (type: int)
-                    Statistics: Num rows: 14 Data size: 1730 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 7 
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string), _col1 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: string), 
_col1 (type: int)
+                      Statistics: Num rows: 28 Data size: 3461 Basic stats: 
COMPLETE Column stats: NONE
+        Reducer 6 
             Reduce Operator Tree:
               Group By Operator
                 keys: KEY._col0 (type: int)
@@ -2250,8 +1900,7 @@ STAGE PLANS:
     Spark
       Edges:
         Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 
(PARTITION-LEVEL SORT, 2)
-        Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 
(PARTITION-LEVEL SORT, 2)
-        Reducer 6 <- Map 5 (GROUP, 2)
+        Reducer 4 <- Map 3 (GROUP, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -2274,88 +1923,52 @@ STAGE PLANS:
                 TableScan
                   alias: p
                   Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: p_partkey (type: int), p_name (type: string), 
p_size (type: int)
-                    outputColumnNames: _col0, _col1, _col2
-                    Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: int), _col2 (type: int)
-                      sort order: ++
-                      Map-reduce partition columns: _col0 (type: int), _col2 
(type: int)
-                      Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                      value expressions: _col1 (type: string)
-        Map 5 
-            Map Operator Tree:
-                TableScan
-                  alias: part
-                  Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: p_partkey (type: int), p_size (type: int)
-                    outputColumnNames: p_partkey, p_size
-                    Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((p_size = p_size) and (p_partkey = p_partkey)) 
(type: boolean)
+                    Statistics: Num rows: 6 Data size: 726 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
-                      keys: p_partkey (type: int), p_size (type: int)
+                      keys: p_partkey (type: int), p_name (type: string), 
p_size (type: int)
                       mode: hash
-                      outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 6 Data size: 726 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col0 (type: int), _col1 (type: int)
-                        sort order: ++
-                        Map-reduce partition columns: _col0 (type: int), _col1 
(type: int)
-                        Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
+                        key expressions: _col0 (type: int), _col1 (type: 
string), _col2 (type: int)
+                        sort order: +++
+                        Map-reduce partition columns: _col0 (type: int), _col1 
(type: string), _col2 (type: int)
+                        Statistics: Num rows: 6 Data size: 726 Basic stats: 
COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
               Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col0 (type: int), _col1 (type: string), _col5 (type: int)
-                  1 _col0 (type: int), _col1 (type: string), _col2 (type: int)
+                  1 _col1 (type: int), _col0 (type: string), _col2 (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
-                Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE 
Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 30 Data size: 3807 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 28 Data size: 3461 Basic stats: 
COMPLETE Column stats: NONE
                   table:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
         Reducer 4 
             Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col0 (type: int), _col2 (type: int)
-                  1 _col0 (type: int), _col1 (type: int)
-                outputColumnNames: _col1, _col3, _col4
-                Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE 
Column stats: NONE
-                Select Operator
-                  expressions: _col3 (type: int), _col1 (type: string), _col4 
(type: int)
-                  outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 28 Data size: 3461 Basic stats: 
COMPLETE Column stats: NONE
-                  Group By Operator
-                    keys: _col0 (type: int), _col1 (type: string), _col2 
(type: int)
-                    mode: hash
-                    outputColumnNames: _col0, _col1, _col2
-                    Statistics: Num rows: 28 Data size: 3461 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: int), _col1 (type: 
string), _col2 (type: int)
-                      sort order: +++
-                      Map-reduce partition columns: _col0 (type: int), _col1 
(type: string), _col2 (type: int)
-                      Statistics: Num rows: 28 Data size: 3461 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 6 
-            Reduce Operator Tree:
               Group By Operator
-                keys: KEY._col0 (type: int), KEY._col1 (type: int)
+                keys: KEY._col0 (type: int), KEY._col1 (type: string), 
KEY._col2 (type: int)
                 mode: mergepartial
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE 
Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: int), _col1 (type: int)
-                  sort order: ++
-                  Map-reduce partition columns: _col0 (type: int), _col1 
(type: int)
-                  Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col1 (type: string), _col0 (type: int), _col2 
(type: int)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col1 (type: int), _col0 (type: string), 
_col2 (type: int)
+                    sort order: +++
+                    Map-reduce partition columns: _col1 (type: int), _col0 
(type: string), _col2 (type: int)
+                    Statistics: Num rows: 3 Data size: 363 Basic stats: 
COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator
@@ -2410,8 +2023,7 @@ STAGE PLANS:
     Spark
       Edges:
         Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 
(PARTITION-LEVEL SORT, 2)
-        Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 
(PARTITION-LEVEL SORT, 2)
-        Reducer 6 <- Map 5 (GROUP, 2)
+        Reducer 4 <- Map 3 (GROUP, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -2434,88 +2046,52 @@ STAGE PLANS:
                 TableScan
                   alias: part
                   Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: p_brand (type: string), p_type (type: string)
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: string)
-                      Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                      value expressions: _col1 (type: string)
-        Map 5 
-            Map Operator Tree:
-                TableScan
-                  alias: part
-                  Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: p_brand (type: string)
-                    outputColumnNames: p_brand
-                    Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                    Group By Operator
-                      keys: p_brand (type: string)
-                      mode: hash
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (p_brand = p_brand) (type: boolean)
+                    Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: p_type (type: string), p_brand (type: 
string)
+                      outputColumnNames: p_type, p_brand
+                      Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
+                      Group By Operator
+                        keys: p_type (type: string), p_brand (type: string)
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string), _col1 (type: 
string)
+                          sort order: ++
+                          Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                          Statistics: Num rows: 13 Data size: 1573 Basic 
stats: COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
               Join Operator
                 condition map:
-                     Left Semi Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col1 (type: string), _col2 (type: string)
                   1 _col0 (type: string), _col1 (type: string)
                 outputColumnNames: _col0
-                Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE 
Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 30 Data size: 3807 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 28 Data size: 3461 Basic stats: 
COMPLETE Column stats: NONE
                   table:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
         Reducer 4 
             Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col0 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col1, _col2
-                Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE 
Column stats: NONE
-                Select Operator
-                  expressions: _col1 (type: string), _col2 (type: string)
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 28 Data size: 3461 Basic stats: 
COMPLETE Column stats: NONE
-                  Group By Operator
-                    keys: _col0 (type: string), _col1 (type: string)
-                    mode: hash
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 28 Data size: 3461 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string), _col1 (type: 
string)
-                      sort order: ++
-                      Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
-                      Statistics: Num rows: 28 Data size: 3461 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 6 
-            Reduce Operator Tree:
               Group By Operator
-                keys: KEY._col0 (type: string)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
                 mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE 
Column stats: NONE
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
+                  Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE 
Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator
@@ -2949,13 +2525,11 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 11 <- Map 10 (GROUP, 2)
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 7 
(PARTITION-LEVEL SORT, 2)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 
(PARTITION-LEVEL SORT, 2)
         Reducer 3 <- Reducer 2 (GROUP, 2)
-        Reducer 4 <- Reducer 11 (PARTITION-LEVEL SORT, 2), Reducer 3 
(PARTITION-LEVEL SORT, 2)
-        Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 9 
(PARTITION-LEVEL SORT, 2)
-        Reducer 7 <- Reducer 6 (GROUP, 2)
-        Reducer 9 <- Map 8 (GROUP, 2)
+        Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2), Reducer 8 
(PARTITION-LEVEL SORT, 2)
+        Reducer 6 <- Map 5 (GROUP, 2)
+        Reducer 8 <- Map 7 (GROUP, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -2972,7 +2546,25 @@ STAGE PLANS:
                       sort order: ++
                       Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
                       Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-        Map 10 
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: sc
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (key = key) (type: boolean)
+                    Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: key (type: string), value (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                        Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+        Map 7 
             Map Operator Tree:
                 TableScan
                   alias: s1
@@ -2994,69 +2586,6 @@ STAGE PLANS:
                           Map-reduce partition columns: _col0 (type: string)
                           Statistics: Num rows: 250 Data size: 2656 Basic 
stats: COMPLETE Column stats: NONE
                           value expressions: _col1 (type: bigint)
-        Map 5 
-            Map Operator Tree:
-                TableScan
-                  alias: sc
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: key (type: string), value (type: string)
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string)
-                      sort order: +
-                      Map-reduce partition columns: _col0 (type: string)
-                      Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                      value expressions: _col1 (type: string)
-        Map 8 
-            Map Operator Tree:
-                TableScan
-                  alias: src
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: key (type: string)
-                    outputColumnNames: key
-                    Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                    Group By Operator
-                      keys: key (type: string)
-                      mode: hash
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 11 
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: count(VALUE._col0)
-                keys: KEY._col0 (type: string)
-                mode: mergepartial
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 125 Data size: 1328 Basic stats: 
COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col1 (type: bigint)
-                  outputColumnNames: _col1
-                  Statistics: Num rows: 125 Data size: 1328 Basic stats: 
COMPLETE Column stats: NONE
-                  Filter Operator
-                    predicate: _col1 is not null (type: boolean)
-                    Statistics: Num rows: 125 Data size: 1328 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: _col1 (type: bigint)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 125 Data size: 1328 Basic stats: 
COMPLETE Column stats: NONE
-                      Group By Operator
-                        keys: _col0 (type: bigint)
-                        mode: hash
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 125 Data size: 1328 Basic stats: 
COMPLETE Column stats: NONE
-                        Reduce Output Operator
-                          key expressions: _col0 (type: bigint)
-                          sort order: +
-                          Map-reduce partition columns: _col0 (type: bigint)
-                          Statistics: Num rows: 125 Data size: 1328 Basic 
stats: COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
               Join Operator
@@ -3115,56 +2644,49 @@ STAGE PLANS:
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
         Reducer 6 
             Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col0 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col1, _col2
-                Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col2 (type: string), _col1 (type: string)
-                  outputColumnNames: _col2, _col1
-                  Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-                  Group By Operator
-                    keys: _col2 (type: string), _col1 (type: string)
-                    mode: hash
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string), _col1 (type: 
string)
-                      sort order: ++
-                      Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
-                      Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 7 
-            Reduce Operator Tree:
               Group By Operator
                 keys: KEY._col0 (type: string), KEY._col1 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 125 Data size: 1328 Basic stats: 
COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col1 (type: string), _col0 (type: string)
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 125 Data size: 1328 Basic stats: 
COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col1 (type: string), _col0 (type: string)
                     sort order: ++
                     Map-reduce partition columns: _col1 (type: string), _col0 
(type: string)
-                    Statistics: Num rows: 275 Data size: 2921 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 9 
+                    Statistics: Num rows: 125 Data size: 1328 Basic stats: 
COMPLETE Column stats: NONE
+        Reducer 8 
             Reduce Operator Tree:
               Group By Operator
+                aggregations: count(VALUE._col0)
                 keys: KEY._col0 (type: string)
                 mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 125 Data size: 1328 Basic stats: 
COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col1 (type: bigint)
+                  outputColumnNames: _col1
+                  Statistics: Num rows: 125 Data size: 1328 Basic stats: 
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: _col1 is not null (type: boolean)
+                    Statistics: Num rows: 125 Data size: 1328 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: _col1 (type: bigint)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 125 Data size: 1328 Basic stats: 
COMPLETE Column stats: NONE
+                      Group By Operator
+                        keys: _col0 (type: bigint)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 125 Data size: 1328 Basic stats: 
COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: bigint)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: bigint)
+                          Statistics: Num rows: 125 Data size: 1328 Basic 
stats: COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator
@@ -3628,10 +3150,8 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 
(PARTITION-LEVEL SORT, 2)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL 
SORT, 2)
         Reducer 3 <- Reducer 2 (SORT, 1)
-        Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 7 
(PARTITION-LEVEL SORT, 2)
-        Reducer 7 <- Map 6 (GROUP, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -3655,37 +3175,22 @@ STAGE PLANS:
                   alias: part
                   Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (p_size < 10) (type: boolean)
-                    Statistics: Num rows: 8 Data size: 968 Basic stats: 
COMPLETE Column stats: NONE
+                    predicate: ((p_size < 10) and (p_mfgr = p_mfgr)) (type: 
boolean)
+                    Statistics: Num rows: 4 Data size: 484 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: p_mfgr (type: string), p_name (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 8 Data size: 968 Basic stats: 
COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 8 Data size: 968 Basic stats: 
COMPLETE Column stats: NONE
-                        value expressions: _col1 (type: string)
-        Map 6 
-            Map Operator Tree:
-                TableScan
-                  alias: b
-                  Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: p_mfgr (type: string)
-                    outputColumnNames: p_mfgr
-                    Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                    Group By Operator
-                      keys: p_mfgr (type: string)
-                      mode: hash
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 4 Data size: 484 Basic stats: 
COMPLETE Column stats: NONE
+                      Group By Operator
+                        keys: _col0 (type: string), _col1 (type: string)
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 4 Data size: 484 Basic stats: 
COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string), _col1 (type: 
string)
+                          sort order: ++
+                          Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
+                          Statistics: Num rows: 4 Data size: 484 Basic stats: 
COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
               Join Operator
@@ -3718,42 +3223,6 @@ STAGE PLANS:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 5 
-            Reduce Operator Tree:
-              Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col0 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col1, _col2
-                Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE 
Column stats: NONE
-                Select Operator
-                  expressions: _col2 (type: string), _col1 (type: string)
-                  outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 14 Data size: 1730 Basic stats: 
COMPLETE Column stats: NONE
-                  Group By Operator
-                    keys: _col0 (type: string), _col1 (type: string)
-                    mode: hash
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 14 Data size: 1730 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: _col0 (type: string), _col1 (type: 
string)
-                      sort order: ++
-                      Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
-                      Statistics: Num rows: 14 Data size: 1730 Basic stats: 
COMPLETE Column stats: NONE
-        Reducer 7 
-            Reduce Operator Tree:
-              Group By Operator
-                keys: KEY._col0 (type: string)
-                mode: mergepartial
-                outputColumnNames: _col0
-                Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE 
Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
 
   Stage: Stage-0
     Fetch Operator
@@ -3791,10 +3260,8 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 
(PARTITION-LEVEL SORT, 2)
-        Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 
(PARTITION-LEVEL SORT, 2)
-        Reducer 5 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 4 
(PARTITION-LEVEL SORT, 2)
-        Reducer 7 <- Map 6 (GROUP, 2)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 
(PARTITION-LEVEL SORT, 2)
+        Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL 
SORT, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -3815,58 +3282,39 @@ STAGE PLANS:
         Map 3 
             Map Operator Tree:
                 TableScan
-                  alias: pp
+                  alias: p
                   Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: p_type is not null (type: boolean)
                     Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: p_type (type: string), p_size (type: int)
+                      expressions: p_name (type: string), p_type (type: string)
                       outputColumnNames: _col0, _col1
                       Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
-                        key expressions: _col1 (type: int)
+                        key expressions: _col1 (type: string)
                         sort order: +
-                        Map-reduce partition columns: _col1 (type: int)
+                        Map-reduce partition columns: _col1 (type: string)
                         Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE


<TRUNCATED>

[04/15] hive git commit: HIVE-15905 : Inefficient plan for correlated subqueries (Vineet Garg via Ashutosh Chauhan)

Reply via email to