[02/12] hive git commit: HIVE-18208: SMB Join : Fix the unit tests to run SMB Joins. (Deepak Jaiswal, reviewed by Jason Dere)

jdere Tue, 12 Dec 2017 12:51:35 -0800

http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out 
b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out
index 60437ec..44f792a 100644
--- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out
@@ -418,146 +418,10 @@ PREHOOK: type: QUERY
 POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN 
bucket_small b ON a.key = b.key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
+  Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-2
-    Spark
-#### A masked pattern was here ####
-      Vertices:
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: b
-                  Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE 
Column stats: NONE
-                  GatherStats: false
-                  Filter Operator
-                    isSamplingPred: false
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 4 Data size: 452 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: string)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 4 Data size: 452 Basic stats: 
COMPLETE Column stats: NONE
-                      Spark HashTable Sink Operator
-                        keys:
-                          0 _col0 (type: string)
-                          1 _col0 (type: string)
-                        Position of Big Table: 0
-            Local Work:
-              Map Reduce Local Work
-                Bucket Mapjoin Context:
-                    Alias Bucket File Name Mapping:
-#### A masked pattern was here ####
-                    Alias Bucket Output File Name Mapping:
-#### A masked pattern was here ####
-            Path -> Alias:
-#### A masked pattern was here ####
-            Path -> Partition:
-#### A masked pattern was here ####
-                Partition
-                  base file name: ds=2008-04-08
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  partition values:
-                    ds 2008-04-08
-                  properties:
-                    bucket_count 4
-                    bucket_field_name key
-                    column.name.delimiter ,
-                    columns key,value
-                    columns.comments 
-                    columns.types string:string
-#### A masked pattern was here ####
-                    name default.bucket_small
-                    numFiles 4
-                    numRows 0
-                    partition_columns ds
-                    partition_columns.types string
-                    rawDataSize 0
-                    serialization.ddl struct bucket_small { string key, string 
value}
-                    serialization.format 1
-                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    totalSize 226
-#### A masked pattern was here ####
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    properties:
-                      SORTBUCKETCOLSPREFIX TRUE
-                      bucket_count 4
-                      bucket_field_name key
-                      column.name.delimiter ,
-                      columns key,value
-                      columns.comments 
-                      columns.types string:string
-#### A masked pattern was here ####
-                      name default.bucket_small
-                      partition_columns ds
-                      partition_columns.types string
-                      serialization.ddl struct bucket_small { string key, 
string value}
-                      serialization.format 1
-                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.bucket_small
-                  name: default.bucket_small
-#### A masked pattern was here ####
-                Partition
-                  base file name: ds=2008-04-09
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  partition values:
-                    ds 2008-04-09
-                  properties:
-                    bucket_count 4
-                    bucket_field_name key
-                    column.name.delimiter ,
-                    columns key,value
-                    columns.comments 
-                    columns.types string:string
-#### A masked pattern was here ####
-                    name default.bucket_small
-                    numFiles 4
-                    numRows 0
-                    partition_columns ds
-                    partition_columns.types string
-                    rawDataSize 0
-                    serialization.ddl struct bucket_small { string key, string 
value}
-                    serialization.format 1
-                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    totalSize 226
-#### A masked pattern was here ####
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    properties:
-                      SORTBUCKETCOLSPREFIX TRUE
-                      bucket_count 4
-                      bucket_field_name key
-                      column.name.delimiter ,
-                      columns key,value
-                      columns.comments 
-                      columns.types string:string
-#### A masked pattern was here ####
-                      name default.bucket_small
-                      partition_columns ds
-                      partition_columns.types string
-                      serialization.ddl struct bucket_small { string key, 
string value}
-                      serialization.format 1
-                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.bucket_small
-                  name: default.bucket_small
-            Truncated Path -> Alias:
-              /bucket_small/ds=2008-04-08 [$hdt$_1:b]
-              /bucket_small/ds=2008-04-09 [$hdt$_1:b]
-
   Stage: Stage-1
     Spark
       Edges:
@@ -578,14 +442,12 @@ STAGE PLANS:
                       expressions: key (type: string)
                       outputColumnNames: _col0
                       Statistics: Num rows: 27 Data size: 2750 Basic stats: 
COMPLETE Column stats: NONE
-                      Map Join Operator
+                      Sorted Merge Bucket Map Join Operator
                         condition map:
                              Inner Join 0 to 1
                         keys:
                           0 _col0 (type: string)
                           1 _col0 (type: string)
-                        input vertices:
-                          1 Map 3
                         Position of Big Table: 0
                         Statistics: Num rows: 29 Data size: 3025 Basic stats: 
COMPLETE Column stats: NONE
                         BucketMapJoin: true
@@ -601,13 +463,6 @@ STAGE PLANS:
                             tag: -1
                             value expressions: _col0 (type: bigint)
                             auto parallelism: false
-            Local Work:
-              Map Reduce Local Work
-                Bucket Mapjoin Context:
-                    Alias Bucket File Name Mapping:
-#### A masked pattern was here ####
-                    Alias Bucket Output File Name Mapping:
-#### A masked pattern was here ####
             Path -> Alias:
 #### A masked pattern was here ####
             Path -> Partition:


http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out 
b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out
index 16ecabe..73525bc 100644
--- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out
@@ -539,146 +539,10 @@ PREHOOK: type: QUERY
 POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN 
bucket_small b ON a.key = b.key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
+  Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-2
-    Spark
-#### A masked pattern was here ####
-      Vertices:
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: b
-                  Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE 
Column stats: NONE
-                  GatherStats: false
-                  Filter Operator
-                    isSamplingPred: false
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 4 Data size: 452 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: string)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 4 Data size: 452 Basic stats: 
COMPLETE Column stats: NONE
-                      Spark HashTable Sink Operator
-                        keys:
-                          0 _col0 (type: string)
-                          1 _col0 (type: string)
-                        Position of Big Table: 0
-            Local Work:
-              Map Reduce Local Work
-                Bucket Mapjoin Context:
-                    Alias Bucket File Name Mapping:
-#### A masked pattern was here ####
-                    Alias Bucket Output File Name Mapping:
-#### A masked pattern was here ####
-            Path -> Alias:
-#### A masked pattern was here ####
-            Path -> Partition:
-#### A masked pattern was here ####
-                Partition
-                  base file name: ds=2008-04-08
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  partition values:
-                    ds 2008-04-08
-                  properties:
-                    bucket_count 4
-                    bucket_field_name key
-                    column.name.delimiter ,
-                    columns key,value
-                    columns.comments 
-                    columns.types string:string
-#### A masked pattern was here ####
-                    name default.bucket_small
-                    numFiles 4
-                    numRows 0
-                    partition_columns ds
-                    partition_columns.types string
-                    rawDataSize 0
-                    serialization.ddl struct bucket_small { string key, string 
value}
-                    serialization.format 1
-                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    totalSize 226
-#### A masked pattern was here ####
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    properties:
-                      SORTBUCKETCOLSPREFIX TRUE
-                      bucket_count 4
-                      bucket_field_name key
-                      column.name.delimiter ,
-                      columns key,value
-                      columns.comments 
-                      columns.types string:string
-#### A masked pattern was here ####
-                      name default.bucket_small
-                      partition_columns ds
-                      partition_columns.types string
-                      serialization.ddl struct bucket_small { string key, 
string value}
-                      serialization.format 1
-                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.bucket_small
-                  name: default.bucket_small
-#### A masked pattern was here ####
-                Partition
-                  base file name: ds=2008-04-09
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  partition values:
-                    ds 2008-04-09
-                  properties:
-                    bucket_count 4
-                    bucket_field_name key
-                    column.name.delimiter ,
-                    columns key,value
-                    columns.comments 
-                    columns.types string:string
-#### A masked pattern was here ####
-                    name default.bucket_small
-                    numFiles 4
-                    numRows 0
-                    partition_columns ds
-                    partition_columns.types string
-                    rawDataSize 0
-                    serialization.ddl struct bucket_small { string key, string 
value}
-                    serialization.format 1
-                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    totalSize 226
-#### A masked pattern was here ####
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    properties:
-                      SORTBUCKETCOLSPREFIX TRUE
-                      bucket_count 4
-                      bucket_field_name key
-                      column.name.delimiter ,
-                      columns key,value
-                      columns.comments 
-                      columns.types string:string
-#### A masked pattern was here ####
-                      name default.bucket_small
-                      partition_columns ds
-                      partition_columns.types string
-                      serialization.ddl struct bucket_small { string key, 
string value}
-                      serialization.format 1
-                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.bucket_small
-                  name: default.bucket_small
-            Truncated Path -> Alias:
-              /bucket_small/ds=2008-04-08 [$hdt$_1:b]
-              /bucket_small/ds=2008-04-09 [$hdt$_1:b]
-
   Stage: Stage-1
     Spark
       Edges:
@@ -699,14 +563,12 @@ STAGE PLANS:
                       expressions: key (type: string)
                       outputColumnNames: _col0
                       Statistics: Num rows: 54 Data size: 5500 Basic stats: 
COMPLETE Column stats: NONE
-                      Map Join Operator
+                      Sorted Merge Bucket Map Join Operator
                         condition map:
                              Inner Join 0 to 1
                         keys:
                           0 _col0 (type: string)
                           1 _col0 (type: string)
-                        input vertices:
-                          1 Map 3
                         Position of Big Table: 0
                         Statistics: Num rows: 59 Data size: 6050 Basic stats: 
COMPLETE Column stats: NONE
                         BucketMapJoin: true
@@ -722,13 +584,6 @@ STAGE PLANS:
                             tag: -1
                             value expressions: _col0 (type: bigint)
                             auto parallelism: false
-            Local Work:
-              Map Reduce Local Work
-                Bucket Mapjoin Context:
-                    Alias Bucket File Name Mapping:
-#### A masked pattern was here ####
-                    Alias Bucket Output File Name Mapping:
-#### A masked pattern was here ####
             Path -> Alias:
 #### A masked pattern was here ####
             Path -> Partition:

http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out 
b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out
index e180471..abd6e65 100644
--- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out
@@ -539,146 +539,10 @@ PREHOOK: type: QUERY
 POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN 
bucket_small b ON a.key = b.key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
+  Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-2
-    Spark
-#### A masked pattern was here ####
-      Vertices:
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: b
-                  Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE 
Column stats: NONE
-                  GatherStats: false
-                  Filter Operator
-                    isSamplingPred: false
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 2 Data size: 228 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: string)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 2 Data size: 228 Basic stats: 
COMPLETE Column stats: NONE
-                      Spark HashTable Sink Operator
-                        keys:
-                          0 _col0 (type: string)
-                          1 _col0 (type: string)
-                        Position of Big Table: 0
-            Local Work:
-              Map Reduce Local Work
-                Bucket Mapjoin Context:
-                    Alias Bucket File Name Mapping:
-#### A masked pattern was here ####
-                    Alias Bucket Output File Name Mapping:
-#### A masked pattern was here ####
-            Path -> Alias:
-#### A masked pattern was here ####
-            Path -> Partition:
-#### A masked pattern was here ####
-                Partition
-                  base file name: ds=2008-04-08
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  partition values:
-                    ds 2008-04-08
-                  properties:
-                    bucket_count 2
-                    bucket_field_name key
-                    column.name.delimiter ,
-                    columns key,value
-                    columns.comments 
-                    columns.types string:string
-#### A masked pattern was here ####
-                    name default.bucket_small
-                    numFiles 2
-                    numRows 0
-                    partition_columns ds
-                    partition_columns.types string
-                    rawDataSize 0
-                    serialization.ddl struct bucket_small { string key, string 
value}
-                    serialization.format 1
-                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    totalSize 114
-#### A masked pattern was here ####
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    properties:
-                      SORTBUCKETCOLSPREFIX TRUE
-                      bucket_count 2
-                      bucket_field_name key
-                      column.name.delimiter ,
-                      columns key,value
-                      columns.comments 
-                      columns.types string:string
-#### A masked pattern was here ####
-                      name default.bucket_small
-                      partition_columns ds
-                      partition_columns.types string
-                      serialization.ddl struct bucket_small { string key, 
string value}
-                      serialization.format 1
-                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.bucket_small
-                  name: default.bucket_small
-#### A masked pattern was here ####
-                Partition
-                  base file name: ds=2008-04-09
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  partition values:
-                    ds 2008-04-09
-                  properties:
-                    bucket_count 2
-                    bucket_field_name key
-                    column.name.delimiter ,
-                    columns key,value
-                    columns.comments 
-                    columns.types string:string
-#### A masked pattern was here ####
-                    name default.bucket_small
-                    numFiles 2
-                    numRows 0
-                    partition_columns ds
-                    partition_columns.types string
-                    rawDataSize 0
-                    serialization.ddl struct bucket_small { string key, string 
value}
-                    serialization.format 1
-                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    totalSize 114
-#### A masked pattern was here ####
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    properties:
-                      SORTBUCKETCOLSPREFIX TRUE
-                      bucket_count 2
-                      bucket_field_name key
-                      column.name.delimiter ,
-                      columns key,value
-                      columns.comments 
-                      columns.types string:string
-#### A masked pattern was here ####
-                      name default.bucket_small
-                      partition_columns ds
-                      partition_columns.types string
-                      serialization.ddl struct bucket_small { string key, 
string value}
-                      serialization.format 1
-                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-#### A masked pattern was here ####
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                    name: default.bucket_small
-                  name: default.bucket_small
-            Truncated Path -> Alias:
-              /bucket_small/ds=2008-04-08 [$hdt$_1:b]
-              /bucket_small/ds=2008-04-09 [$hdt$_1:b]
-
   Stage: Stage-1
     Spark
       Edges:
@@ -699,14 +563,12 @@ STAGE PLANS:
                       expressions: key (type: string)
                       outputColumnNames: _col0
                       Statistics: Num rows: 116 Data size: 11624 Basic stats: 
COMPLETE Column stats: NONE
-                      Map Join Operator
+                      Sorted Merge Bucket Map Join Operator
                         condition map:
                              Inner Join 0 to 1
                         keys:
                           0 _col0 (type: string)
                           1 _col0 (type: string)
-                        input vertices:
-                          1 Map 3
                         Position of Big Table: 0
                         Statistics: Num rows: 127 Data size: 12786 Basic 
stats: COMPLETE Column stats: NONE
                         BucketMapJoin: true
@@ -722,13 +584,6 @@ STAGE PLANS:
                             tag: -1
                             value expressions: _col0 (type: bigint)
                             auto parallelism: false
-            Local Work:
-              Map Reduce Local Work
-                Bucket Mapjoin Context:
-                    Alias Bucket File Name Mapping:
-#### A masked pattern was here ####
-                    Alias Bucket Output File Name Mapping:
-#### A masked pattern was here ####
             Path -> Alias:
 #### A masked pattern was here ####
             Path -> Partition:

http://git-wip-us.apache.org/repos/asf/hive/blob/38405c14/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out 
b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out
index 4d0476f..033ee04 100644
--- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out
@@ -387,18 +387,19 @@ join
 on src1.key = src2.key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
+  Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-2
+  Stage: Stage-1
     Spark
       Edges:
-        Reducer 4 <- Map 3 (GROUP, 1)
+        Reducer 2 <- Map 1 (GROUP, 1)
+        Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 5 
(PARTITION-LEVEL SORT, 1)
+        Reducer 5 <- Map 1 (GROUP, 1)
 #### A masked pattern was here ####
       Vertices:
-        Map 3 
+        Map 1 
             Map Operator Tree:
                 TableScan
                   alias: a
@@ -430,9 +431,7 @@ STAGE PLANS:
                             Map-reduce partition columns: _col0 (type: int)
                             Statistics: Num rows: 11 Data size: 77 Basic 
stats: COMPLETE Column stats: NONE
                             value expressions: _col1 (type: bigint)
-        Reducer 4 
-            Local Work:
-              Map Reduce Local Work
+        Reducer 2 
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)
@@ -440,52 +439,34 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE 
Column stats: NONE
-                Spark HashTable Sink Operator
-                  keys:
-                    0 _col0 (type: int)
-                    1 _col0 (type: int)
-
-  Stage: Stage-1
-    Spark
-      Edges:
-        Reducer 2 <- Map 1 (GROUP, 1)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: a
-                  Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE 
Column stats: NONE
-                  Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 10 Data size: 70 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 10 Data size: 70 Basic stats: 
COMPLETE Column stats: NONE
-                      Sorted Merge Bucket Map Join Operator
-                        condition map:
-                             Inner Join 0 to 1
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-                        outputColumnNames: _col0
-                        Statistics: Num rows: 11 Data size: 77 Basic stats: 
COMPLETE Column stats: NONE
-                        Group By Operator
-                          aggregations: count()
-                          keys: _col0 (type: int)
-                          mode: hash
-                          outputColumnNames: _col0, _col1
-                          Statistics: Num rows: 11 Data size: 77 Basic stats: 
COMPLETE Column stats: NONE
-                          Reduce Output Operator
-                            key expressions: _col0 (type: int)
-                            sort order: +
-                            Map-reduce partition columns: _col0 (type: int)
-                            Statistics: Num rows: 11 Data size: 77 Basic 
stats: COMPLETE Column stats: NONE
-                            value expressions: _col1 (type: bigint)
-        Reducer 2 
-            Local Work:
-              Map Reduce Local Work
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE 
Column stats: NONE
+                  value expressions: _col1 (type: bigint)
+        Reducer 3 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: int), _col3 (type: bigint), _col1 
(type: bigint)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 5 Data size: 38 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 5 
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)
@@ -493,27 +474,12 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE 
Column stats: NONE
-                Map Join Operator
-                  condition map:
-                       Inner Join 0 to 1
-                  keys:
-                    0 _col0 (type: int)
-                    1 _col0 (type: int)
-                  outputColumnNames: _col0, _col1, _col3
-                  input vertices:
-                    1 Reducer 4
-                  Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE 
Column stats: NONE
-                  Select Operator
-                    expressions: _col0 (type: int), _col3 (type: bigint), 
_col1 (type: bigint)
-                    outputColumnNames: _col0, _col1, _col2
-                    Statistics: Num rows: 5 Data size: 38 Basic stats: 
COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 5 Data size: 38 Basic stats: 
COMPLETE Column stats: NONE
-                      table:
-                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE 
Column stats: NONE
+                  value expressions: _col1 (type: bigint)
 
   Stage: Stage-0
     Fetch Operator
@@ -1018,16 +984,18 @@ select count(*) from
   on subq1.key = subq2.key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
+  Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-2
+  Stage: Stage-1
     Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL 
SORT, 1)
+        Reducer 3 <- Reducer 2 (GROUP, 1)
 #### A masked pattern was here ####
       Vertices:
-        Map 3 
+        Map 1 
             Map Operator Tree:
                 TableScan
                   alias: a
@@ -1039,20 +1007,12 @@ STAGE PLANS:
                       expressions: (key + 1) (type: int)
                       outputColumnNames: _col0
                       Statistics: Num rows: 10 Data size: 70 Basic stats: 
COMPLETE Column stats: NONE
-                      Spark HashTable Sink Operator
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-            Local Work:
-              Map Reduce Local Work
-
-  Stage: Stage-1
-    Spark
-      Edges:
-        Reducer 2 <- Map 1 (GROUP, 1)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 10 Data size: 70 Basic stats: 
COMPLETE Column stats: NONE
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: a
@@ -1064,28 +1024,31 @@ STAGE PLANS:
                       expressions: (key + 1) (type: int)
                       outputColumnNames: _col0
                       Statistics: Num rows: 10 Data size: 70 Basic stats: 
COMPLETE Column stats: NONE
-                      Map Join Operator
-                        condition map:
-                             Inner Join 0 to 1
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-                        input vertices:
-                          1 Map 3
-                        Statistics: Num rows: 11 Data size: 77 Basic stats: 
COMPLETE Column stats: NONE
-                        Group By Operator
-                          aggregations: count()
-                          mode: hash
-                          outputColumnNames: _col0
-                          Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-                          Reduce Output Operator
-                            sort order: 
-                            Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-                            value expressions: _col0 (type: bigint)
-            Local Work:
-              Map Reduce Local Work
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 10 Data size: 70 Basic stats: 
COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE 
Column stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)
                 mode: mergepartial
@@ -1533,34 +1496,10 @@ select count(*) from (
 ) subq1
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
+  Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-2
-    Spark
-#### A masked pattern was here ####
-      Vertices:
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: b
-                  Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE 
Column stats: NONE
-                  Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 10 Data size: 70 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 10 Data size: 70 Basic stats: 
COMPLETE Column stats: NONE
-                      Spark HashTable Sink Operator
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-            Local Work:
-              Map Reduce Local Work
-
   Stage: Stage-1
     Spark
       Edges:
@@ -1579,14 +1518,12 @@ STAGE PLANS:
                       expressions: key (type: int)
                       outputColumnNames: _col0
                       Statistics: Num rows: 10 Data size: 70 Basic stats: 
COMPLETE Column stats: NONE
-                      Map Join Operator
+                      Sorted Merge Bucket Map Join Operator
                         condition map:
                              Inner Join 0 to 1
                         keys:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
-                        input vertices:
-                          1 Map 3
                         Statistics: Num rows: 11 Data size: 77 Basic stats: 
COMPLETE Column stats: NONE
                         Group By Operator
                           aggregations: count()
@@ -1597,8 +1534,6 @@ STAGE PLANS:
                             sort order: 
                             Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
                             value expressions: _col0 (type: bigint)
-            Local Work:
-              Map Reduce Local Work
         Reducer 2 
             Reduce Operator Tree:
               Group By Operator
@@ -1650,34 +1585,10 @@ select key, count(*) from
 group by key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
+  Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-2
-    Spark
-#### A masked pattern was here ####
-      Vertices:
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: b
-                  Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE 
Column stats: NONE
-                  Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 10 Data size: 70 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 10 Data size: 70 Basic stats: 
COMPLETE Column stats: NONE
-                      Spark HashTable Sink Operator
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-            Local Work:
-              Map Reduce Local Work
-
   Stage: Stage-1
     Spark
       Edges:
@@ -1696,15 +1607,13 @@ STAGE PLANS:
                       expressions: key (type: int)
                       outputColumnNames: _col0
                       Statistics: Num rows: 10 Data size: 70 Basic stats: 
COMPLETE Column stats: NONE
-                      Map Join Operator
+                      Sorted Merge Bucket Map Join Operator
                         condition map:
                              Inner Join 0 to 1
                         keys:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
                         outputColumnNames: _col0
-                        input vertices:
-                          1 Map 3
                         Statistics: Num rows: 11 Data size: 77 Basic stats: 
COMPLETE Column stats: NONE
                         Group By Operator
                           aggregations: count()
@@ -1718,8 +1627,6 @@ STAGE PLANS:
                             Map-reduce partition columns: _col0 (type: int)
                             Statistics: Num rows: 11 Data size: 77 Basic 
stats: COMPLETE Column stats: NONE
                             value expressions: _col1 (type: bigint)
-            Local Work:
-              Map Reduce Local Work
         Reducer 2 
             Reduce Operator Tree:
               Group By Operator
@@ -1787,34 +1694,10 @@ select count(*) from
 ) subq2
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
+  Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-2
-    Spark
-#### A masked pattern was here ####
-      Vertices:
-        Map 4 
-            Map Operator Tree:
-                TableScan
-                  alias: b
-                  Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE 
Column stats: NONE
-                  Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 10 Data size: 70 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 10 Data size: 70 Basic stats: 
COMPLETE Column stats: NONE
-                      Spark HashTable Sink Operator
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-            Local Work:
-              Map Reduce Local Work
-
   Stage: Stage-1
     Spark
       Edges:
@@ -1834,15 +1717,13 @@ STAGE PLANS:
                       expressions: key (type: int)
                       outputColumnNames: _col0
                       Statistics: Num rows: 10 Data size: 70 Basic stats: 
COMPLETE Column stats: NONE
-                      Map Join Operator
+                      Sorted Merge Bucket Map Join Operator
                         condition map:
                              Inner Join 0 to 1
                         keys:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
                         outputColumnNames: _col0
-                        input vertices:
-                          1 Map 4
                         Statistics: Num rows: 11 Data size: 77 Basic stats: 
COMPLETE Column stats: NONE
                         Group By Operator
                           keys: _col0 (type: int)
@@ -1854,8 +1735,6 @@ STAGE PLANS:
                             sort order: +
                             Map-reduce partition columns: _col0 (type: int)
                             Statistics: Num rows: 11 Data size: 77 Basic 
stats: COMPLETE Column stats: NONE
-            Local Work:
-              Map Reduce Local Work
         Reducer 2 
             Reduce Operator Tree:
               Group By Operator
@@ -1955,43 +1834,19 @@ join
 on src1.key = src2.key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-3 is a root stage
-  Stage-2 depends on stages: Stage-3
-  Stage-4 depends on stages: Stage-2
-  Stage-1 depends on stages: Stage-4
+  Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-3
-    Spark
-#### A masked pattern was here ####
-      Vertices:
-        Map 6 
-            Map Operator Tree:
-                TableScan
-                  alias: b
-                  Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE 
Column stats: NONE
-                  Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 10 Data size: 70 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 10 Data size: 70 Basic stats: 
COMPLETE Column stats: NONE
-                      Spark HashTable Sink Operator
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-            Local Work:
-              Map Reduce Local Work
-
-  Stage: Stage-2
+  Stage: Stage-1
     Spark
       Edges:
-        Reducer 5 <- Map 4 (GROUP, 1)
+        Reducer 2 <- Map 1 (GROUP, 1)
+        Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 5 
(PARTITION-LEVEL SORT, 1)
+        Reducer 5 <- Map 1 (GROUP, 1)
 #### A masked pattern was here ####
       Vertices:
-        Map 4 
+        Map 1 
             Map Operator Tree:
                 TableScan
                   alias: a
@@ -2003,15 +1858,13 @@ STAGE PLANS:
                       expressions: key (type: int)
                       outputColumnNames: _col0
                       Statistics: Num rows: 10 Data size: 70 Basic stats: 
COMPLETE Column stats: NONE
-                      Map Join Operator
+                      Sorted Merge Bucket Map Join Operator
                         condition map:
                              Inner Join 0 to 1
                         keys:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
                         outputColumnNames: _col0
-                        input vertices:
-                          1 Map 6
                         Statistics: Num rows: 11 Data size: 77 Basic stats: 
COMPLETE Column stats: NONE
                         Group By Operator
                           aggregations: count()
@@ -2025,11 +1878,7 @@ STAGE PLANS:
                             Map-reduce partition columns: _col0 (type: int)
                             Statistics: Num rows: 11 Data size: 77 Basic 
stats: COMPLETE Column stats: NONE
                             value expressions: _col1 (type: bigint)
-            Local Work:
-              Map Reduce Local Work
-        Reducer 5 
-            Local Work:
-              Map Reduce Local Work
+        Reducer 2 
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)
@@ -2037,79 +1886,34 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE 
Column stats: NONE
-                Spark HashTable Sink Operator
-                  keys:
-                    0 _col0 (type: int)
-                    1 _col0 (type: int)
-
-  Stage: Stage-4
-    Spark
-#### A masked pattern was here ####
-      Vertices:
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: b
-                  Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE 
Column stats: NONE
-                  Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 10 Data size: 70 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 10 Data size: 70 Basic stats: 
COMPLETE Column stats: NONE
-                      Spark HashTable Sink Operator
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-            Local Work:
-              Map Reduce Local Work
-
-  Stage: Stage-1
-    Spark
-      Edges:
-        Reducer 2 <- Map 1 (GROUP, 1)
-#### A masked pattern was here ####
-      Vertices:
-        Map 1 
-            Map Operator Tree:
-                TableScan
-                  alias: a
-                  Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE 
Column stats: NONE
-                  Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 10 Data size: 70 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 10 Data size: 70 Basic stats: 
COMPLETE Column stats: NONE
-                      Map Join Operator
-                        condition map:
-                             Inner Join 0 to 1
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-                        outputColumnNames: _col0
-                        input vertices:
-                          1 Map 3
-                        Statistics: Num rows: 11 Data size: 77 Basic stats: 
COMPLETE Column stats: NONE
-                        Group By Operator
-                          aggregations: count()
-                          keys: _col0 (type: int)
-                          mode: hash
-                          outputColumnNames: _col0, _col1
-                          Statistics: Num rows: 11 Data size: 77 Basic stats: 
COMPLETE Column stats: NONE
-                          Reduce Output Operator
-                            key expressions: _col0 (type: int)
-                            sort order: +
-                            Map-reduce partition columns: _col0 (type: int)
-                            Statistics: Num rows: 11 Data size: 77 Basic 
stats: COMPLETE Column stats: NONE
-                            value expressions: _col1 (type: bigint)
-            Local Work:
-              Map Reduce Local Work
-        Reducer 2 
-            Local Work:
-              Map Reduce Local Work
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE 
Column stats: NONE
+                  value expressions: _col1 (type: bigint)
+        Reducer 3 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: int), _col3 (type: bigint), _col1 
(type: bigint)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 5 Data size: 38 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 5 
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)
@@ -2117,27 +1921,12 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE 
Column stats: NONE
-                Map Join Operator
-                  condition map:
-                       Inner Join 0 to 1
-                  keys:
-                    0 _col0 (type: int)
-                    1 _col0 (type: int)
-                  outputColumnNames: _col0, _col1, _col3
-                  input vertices:
-                    1 Reducer 5
-                  Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE 
Column stats: NONE
-                  Select Operator
-                    expressions: _col0 (type: int), _col3 (type: bigint), 
_col1 (type: bigint)
-                    outputColumnNames: _col0, _col1, _col2
-                    Statistics: Num rows: 5 Data size: 38 Basic stats: 
COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 5 Data size: 38 Basic stats: 
COMPLETE Column stats: NONE
-                      table:
-                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE 
Column stats: NONE
+                  value expressions: _col1 (type: bigint)
 
   Stage: Stage-0
     Fetch Operator
@@ -2204,34 +1993,10 @@ select count(*) from
   on subq1.key = subq2.key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
+  Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-2
-    Spark
-#### A masked pattern was here ####
-      Vertices:
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: a
-                  Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE 
Column stats: NONE
-                  Filter Operator
-                    predicate: (key < 6) (type: boolean)
-                    Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                      Spark HashTable Sink Operator
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-            Local Work:
-              Map Reduce Local Work
-
   Stage: Stage-1
     Spark
       Edges:
@@ -2250,14 +2015,12 @@ STAGE PLANS:
                       expressions: key (type: int)
                       outputColumnNames: _col0
                       Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                      Map Join Operator
+                      Sorted Merge Bucket Map Join Operator
                         condition map:
                              Inner Join 0 to 1
                         keys:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
-                        input vertices:
-                          1 Map 3
                         Statistics: Num rows: 3 Data size: 23 Basic stats: 
COMPLETE Column stats: NONE
                         Group By Operator
                           aggregations: count()
@@ -2268,8 +2031,6 @@ STAGE PLANS:
                             sort order: 
                             Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
                             value expressions: _col0 (type: bigint)
-            Local Work:
-              Map Reduce Local Work
         Reducer 2 
             Reduce Operator Tree:
               Group By Operator
@@ -2335,34 +2096,10 @@ select count(*) from
   on subq2.key = b.key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
+  Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-2
-    Spark
-#### A masked pattern was here ####
-      Vertices:
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: b
-                  Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE 
Column stats: NONE
-                  Filter Operator
-                    predicate: (key < 6) (type: boolean)
-                    Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                      Spark HashTable Sink Operator
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-            Local Work:
-              Map Reduce Local Work
-
   Stage: Stage-1
     Spark
       Edges:
@@ -2381,14 +2118,12 @@ STAGE PLANS:
                       expressions: key (type: int)
                       outputColumnNames: _col0
                       Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                      Map Join Operator
+                      Sorted Merge Bucket Map Join Operator
                         condition map:
                              Inner Join 0 to 1
                         keys:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
-                        input vertices:
-                          1 Map 3
                         Statistics: Num rows: 3 Data size: 23 Basic stats: 
COMPLETE Column stats: NONE
                         Group By Operator
                           aggregations: count()
@@ -2399,8 +2134,6 @@ STAGE PLANS:
                             sort order: 
                             Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
                             value expressions: _col0 (type: bigint)
-            Local Work:
-              Map Reduce Local Work
         Reducer 2 
             Reduce Operator Tree:
               Group By Operator
@@ -2490,34 +2223,10 @@ select count(*) from
   on subq2.key = subq4.key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
+  Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-2
-    Spark
-#### A masked pattern was here ####
-      Vertices:
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: a
-                  Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE 
Column stats: NONE
-                  Filter Operator
-                    predicate: (key < 6) (type: boolean)
-                    Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                      Spark HashTable Sink Operator
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-            Local Work:
-              Map Reduce Local Work
-
   Stage: Stage-1
     Spark
       Edges:
@@ -2536,14 +2245,12 @@ STAGE PLANS:
                       expressions: key (type: int)
                       outputColumnNames: _col0
                       Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                      Map Join Operator
+                      Sorted Merge Bucket Map Join Operator
                         condition map:
                              Inner Join 0 to 1
                         keys:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
-                        input vertices:
-                          1 Map 3
                         Statistics: Num rows: 3 Data size: 23 Basic stats: 
COMPLETE Column stats: NONE
                         Group By Operator
                           aggregations: count()
@@ -2554,8 +2261,6 @@ STAGE PLANS:
                             sort order: 
                             Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
                             value expressions: _col0 (type: bigint)
-            Local Work:
-              Map Reduce Local Work
         Reducer 2 
             Reduce Operator Tree:
               Group By Operator
@@ -2633,34 +2338,10 @@ select count(*) from
   on subq1.key = subq2.key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
+  Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-2
-    Spark
-#### A masked pattern was here ####
-      Vertices:
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: a
-                  Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE 
Column stats: NONE
-                  Filter Operator
-                    predicate: (key < 8) (type: boolean)
-                    Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                      Spark HashTable Sink Operator
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-            Local Work:
-              Map Reduce Local Work
-
   Stage: Stage-1
     Spark
       Edges:
@@ -2679,14 +2360,12 @@ STAGE PLANS:
                       expressions: key (type: int)
                       outputColumnNames: _col0
                       Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                      Map Join Operator
+                      Sorted Merge Bucket Map Join Operator
                         condition map:
                              Inner Join 0 to 1
                         keys:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
-                        input vertices:
-                          1 Map 3
                         Statistics: Num rows: 3 Data size: 23 Basic stats: 
COMPLETE Column stats: NONE
                         Group By Operator
                           aggregations: count()
@@ -2697,8 +2376,6 @@ STAGE PLANS:
                             sort order: 
                             Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
                             value expressions: _col0 (type: bigint)
-            Local Work:
-              Map Reduce Local Work
         Reducer 2 
             Reduce Operator Tree:
               Group By Operator
@@ -2750,34 +2427,10 @@ select count(*) from
     join tbl2 a on subq1.key = a.key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
+  Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-2
-    Spark
-#### A masked pattern was here ####
-      Vertices:
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: a
-                  Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE 
Column stats: NONE
-                  Filter Operator
-                    predicate: (key < 6) (type: boolean)
-                    Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                      Spark HashTable Sink Operator
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-            Local Work:
-              Map Reduce Local Work
-
   Stage: Stage-1
     Spark
       Edges:
@@ -2796,14 +2449,12 @@ STAGE PLANS:
                       expressions: key (type: int)
                       outputColumnNames: _col0
                       Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                      Map Join Operator
+                      Sorted Merge Bucket Map Join Operator
                         condition map:
                              Inner Join 0 to 1
                         keys:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
-                        input vertices:
-                          1 Map 3
                         Statistics: Num rows: 3 Data size: 23 Basic stats: 
COMPLETE Column stats: NONE
                         Group By Operator
                           aggregations: count()
@@ -2814,8 +2465,6 @@ STAGE PLANS:
                             sort order: 
                             Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
                             value expressions: _col0 (type: bigint)
-            Local Work:
-              Map Reduce Local Work
         Reducer 2 
             Reduce Operator Tree:
               Group By Operator
@@ -2865,34 +2514,10 @@ select count(*) from tbl1 a
   on a.key = subq1.key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
+  Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-2
-    Spark
-#### A masked pattern was here ####
-      Vertices:
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: a
-                  Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE 
Column stats: NONE
-                  Filter Operator
-                    predicate: (key < 6) (type: boolean)
-                    Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                      Spark HashTable Sink Operator
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-            Local Work:
-              Map Reduce Local Work
-
   Stage: Stage-1
     Spark
       Edges:
@@ -2911,14 +2536,12 @@ STAGE PLANS:
                       expressions: key (type: int)
                       outputColumnNames: _col0
                       Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                      Map Join Operator
+                      Sorted Merge Bucket Map Join Operator
                         condition map:
                              Inner Join 0 to 1
                         keys:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
-                        input vertices:
-                          1 Map 3
                         Statistics: Num rows: 3 Data size: 23 Basic stats: 
COMPLETE Column stats: NONE
                         Group By Operator
                           aggregations: count()
@@ -2929,8 +2552,6 @@ STAGE PLANS:
                             sort order: 
                             Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
                             value expressions: _col0 (type: bigint)
-            Local Work:
-              Map Reduce Local Work
         Reducer 2 
             Reduce Operator Tree:
               Group By Operator
@@ -2990,54 +2611,10 @@ select count(*) from
   on (subq1.key = subq3.key)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
+  Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-2
-    Spark
-#### A masked pattern was here ####
-      Vertices:
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: a
-                  Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE 
Column stats: NONE
-                  Filter Operator
-                    predicate: (key < 6) (type: boolean)
-                    Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                      Spark HashTable Sink Operator
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-                          2 _col0 (type: int)
-            Local Work:
-              Map Reduce Local Work
-        Map 4 
-            Map Operator Tree:
-                TableScan
-                  alias: a
-                  Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE 
Column stats: NONE
-                  Filter Operator
-                    predicate: (key < 6) (type: boolean)
-                    Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                      Spark HashTable Sink Operator
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-                          2 _col0 (type: int)
-            Local Work:
-              Map Reduce Local Work
-
   Stage: Stage-1
     Spark
       Edges:
@@ -3056,7 +2633,7 @@ STAGE PLANS:
                       expressions: key (type: int)
                       outputColumnNames: _col0
                       Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                      Map Join Operator
+                      Sorted Merge Bucket Map Join Operator
                         condition map:
                              Inner Join 0 to 1
                              Inner Join 0 to 2
@@ -3064,9 +2641,6 @@ STAGE PLANS:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
                           2 _col0 (type: int)
-                        input vertices:
-                          1 Map 3
-                          2 Map 4
                         Statistics: Num rows: 6 Data size: 46 Basic stats: 
COMPLETE Column stats: NONE
                         Group By Operator
                           aggregations: count()
@@ -3077,8 +2651,6 @@ STAGE PLANS:
                             sort order: 
                             Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
                             value expressions: _col0 (type: bigint)
-            Local Work:
-              Map Reduce Local Work
         Reducer 2 
             Reduce Operator Tree:
               Group By Operator
@@ -3152,34 +2724,10 @@ join tbl2 b
 on subq2.key = b.key) a
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
+  Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
-  Stage: Stage-2
-    Spark
-#### A masked pattern was here ####
-      Vertices:
-        Map 3 
-            Map Operator Tree:
-                TableScan
-                  alias: b
-                  Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE 
Column stats: NONE
-                  Filter Operator
-                    predicate: (key < 6) (type: boolean)
-                    Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                      Spark HashTable Sink Operator
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-            Local Work:
-              Map Reduce Local Work
-
   Stage: Stage-1
     Spark
       Edges:
@@ -3198,14 +2746,12 @@ STAGE PLANS:
                       expressions: key (type: int)
                       outputColumnNames: _col0
                       Statistics: Num rows: 3 Data size: 21 Basic stats: 
COMPLETE Column stats: NONE
-                      Map Join Operator
+                      Sorted Merge Bucket Map Join Operator
                         condition map:
                              Inner Join 0 to 1
                         keys:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
-                        input vertices:
-                          1 Map 3
                         Statistics: Num rows: 3 Data size: 23 Basic stats: 
COMPLETE Column stats: NONE
                         Group By Operator
                           aggregations: count()
@@ -3216,8 +2762,6 @@ STAGE PLANS:
                             sort order: 
                             Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
                             value expressions: _col0 (type: bigint)
-            Local Work:
-              Map Reduce Local Work
         Reducer 2 
             Reduce Operator Tree:
               Group By Operator

[02/12] hive git commit: HIVE-18208: SMB Join : Fix the unit tests to run SMB Joins. (Deepak Jaiswal, reviewed by Jason Dere)

Reply via email to