[3/5] hive git commit: HIVE-17529: Bucket Map Join : Sets incorrect edge type causing execution failure (Deepak Jaiswal, reviewed by Jason Dere)

jdere Thu, 21 Sep 2017 13:25:21 -0700
http://git-wip-us.apache.org/repos/asf/hive/blob/8cdee629/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out 
b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out
index 57c792f..e2cee7f 100644
--- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out
@@ -108,6 +108,62 @@ POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08
 POSTHOOK: Output: default@tab@ds=2008-04-08
 POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE 
[(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, 
comment:null), ]
 POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE 
[(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, 
comment:null), ]
+PREHOOK: query: analyze table srcbucket_mapjoin compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket_mapjoin
+PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08
+PREHOOK: Output: default@srcbucket_mapjoin
+PREHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table srcbucket_mapjoin compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket_mapjoin
+POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08
+POSTHOOK: Output: default@srcbucket_mapjoin
+POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08
+#### A masked pattern was here ####
+PREHOOK: query: analyze table srcbucket_mapjoin_part compute statistics for 
columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket_mapjoin_part
+PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
+PREHOOK: Output: default@srcbucket_mapjoin_part
+PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table srcbucket_mapjoin_part compute statistics for 
columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket_mapjoin_part
+POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
+POSTHOOK: Output: default@srcbucket_mapjoin_part
+POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08
+#### A masked pattern was here ####
+PREHOOK: query: analyze table tab compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab
+PREHOOK: Input: default@tab@ds=2008-04-08
+PREHOOK: Output: default@tab
+PREHOOK: Output: default@tab@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table tab compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab
+POSTHOOK: Input: default@tab@ds=2008-04-08
+POSTHOOK: Output: default@tab
+POSTHOOK: Output: default@tab@ds=2008-04-08
+#### A masked pattern was here ####
+PREHOOK: query: analyze table tab_part compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab_part
+PREHOOK: Input: default@tab_part@ds=2008-04-08
+PREHOOK: Output: default@tab_part
+PREHOOK: Output: default@tab_part@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table tab_part compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab_part
+POSTHOOK: Input: default@tab_part@ds=2008-04-08
+POSTHOOK: Output: default@tab_part
+POSTHOOK: Output: default@tab_part@ds=2008-04-08
+#### A masked pattern was here ####
 PREHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on 
a.key = c.key join tab_part b on a.value = b.value
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select a.key, b.key from tab_part a join tab_part c 
on a.key = c.key join tab_part b on a.value = b.value
@@ -121,94 +177,216 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 2 <- Map 1 (CUSTOM_EDGE)
-        Map 3 <- Map 2 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
+                  alias: a
+                  Statistics: Num rows: 500 Data size: 47500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (key is not null and value is not null) (type: 
boolean)
+                    Statistics: Num rows: 500 Data size: 47500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: int), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 500 Data size: 47500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 500 Data size: 47500 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: string)
+                  Filter Operator
+                    predicate: value is not null (type: boolean)
+                    Statistics: Num rows: 500 Data size: 47500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: int), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 500 Data size: 47500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col1 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col1 (type: string)
+                        Statistics: Num rows: 500 Data size: 47500 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: int)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
                   alias: c
-                  Statistics: Num rows: 500 Data size: 1904 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 475 Data size: 1808 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 475 Data size: 1808 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 475 Data size: 1808 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
-        Map 2 
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 809 Data size: 76855 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col1 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col1 (type: string)
+                  Statistics: Num rows: 809 Data size: 76855 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  value expressions: _col0 (type: int)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col1 (type: string)
+                  1 _col1 (type: string)
+                outputColumnNames: _col0, _col3
+                Statistics: Num rows: 1309 Data size: 10472 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: int), _col3 (type: int)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1309 Data size: 10472 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1309 Data size: 10472 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on 
a.key = c.key join tab_part b on a.value = b.value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select a.key, b.key from tab_part a join tab_part c 
on a.key = c.key join tab_part b on a.value = b.value
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Map 3 (CUSTOM_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
             Map Operator Tree:
                 TableScan
                   alias: a
-                  Statistics: Num rows: 500 Data size: 89488 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 500 Data size: 47500 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: (key is not null and value is not null) (type: 
boolean)
-                    Statistics: Num rows: 450 Data size: 80539 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 500 Data size: 47500 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 450 Data size: 80539 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 500 Data size: 47500 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
                         keys:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
-                        outputColumnNames: _col1, _col2
+                        outputColumnNames: _col0, _col1
                         input vertices:
-                          0 Map 1
-                        Statistics: Num rows: 522 Data size: 1988 Basic stats: 
COMPLETE Column stats: NONE
+                          1 Map 3
+                        Statistics: Num rows: 809 Data size: 76855 Basic 
stats: COMPLETE Column stats: COMPLETE
                         Reduce Output Operator
-                          key expressions: _col2 (type: string)
+                          key expressions: _col1 (type: string)
                           sort order: +
-                          Map-reduce partition columns: _col2 (type: string)
-                          Statistics: Num rows: 522 Data size: 1988 Basic 
stats: COMPLETE Column stats: NONE
-                          value expressions: _col1 (type: int)
+                          Map-reduce partition columns: _col1 (type: string)
+                          Statistics: Num rows: 809 Data size: 76855 Basic 
stats: COMPLETE Column stats: COMPLETE
+                          value expressions: _col0 (type: int)
             Execution mode: llap
             LLAP IO: no inputs
         Map 3 
             Map Operator Tree:
                 TableScan
+                  alias: c
+                  Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
                   alias: b
-                  Statistics: Num rows: 500 Data size: 89488 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 500 Data size: 47500 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: value is not null (type: boolean)
-                    Statistics: Num rows: 475 Data size: 85013 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 500 Data size: 47500 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 475 Data size: 85013 Basic stats: 
COMPLETE Column stats: NONE
-                      Map Join Operator
-                        condition map:
-                             Inner Join 0 to 1
-                        keys:
-                          0 _col2 (type: string)
-                          1 _col1 (type: string)
-                        outputColumnNames: _col1, _col3
-                        input vertices:
-                          0 Map 2
-                        Statistics: Num rows: 574 Data size: 2186 Basic stats: 
COMPLETE Column stats: NONE
-                        Select Operator
-                          expressions: _col1 (type: int), _col3 (type: int)
-                          outputColumnNames: _col0, _col1
-                          Statistics: Num rows: 574 Data size: 2186 Basic 
stats: COMPLETE Column stats: NONE
-                          File Output Operator
-                            compressed: false
-                            Statistics: Num rows: 574 Data size: 2186 Basic 
stats: COMPLETE Column stats: NONE
-                            table:
-                                input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                                serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      Statistics: Num rows: 500 Data size: 47500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col1 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col1 (type: string)
+                        Statistics: Num rows: 500 Data size: 47500 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: int)
             Execution mode: llap
             LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col1 (type: string)
+                  1 _col1 (type: string)
+                outputColumnNames: _col0, _col3
+                Statistics: Num rows: 1309 Data size: 10472 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: int), _col3 (type: int)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1309 Data size: 10472 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1309 Data size: 10472 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -238,6 +416,16 @@ POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08
 POSTHOOK: Output: default@tab1
 POSTHOOK: Lineage: tab1.key SIMPLE 
[(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, 
comment:null), ]
 POSTHOOK: Lineage: tab1.value SIMPLE 
[(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, 
comment:null), ]
+PREHOOK: query: analyze table tab1 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+PREHOOK: Output: default@tab1
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table tab1 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+POSTHOOK: Output: default@tab1
+#### A masked pattern was here ####
 PREHOOK: query: explain
 select a.key, a.value, b.value
 from tab1 a join src b on a.key = b.key
@@ -262,19 +450,19 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: a
-                  Statistics: Num rows: 242 Data size: 43428 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 242 Data size: 22990 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 230 Data size: 41274 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 242 Data size: 22990 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 230 Data size: 41274 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 242 Data size: 22990 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: UDFToDouble(_col0) (type: double)
                         sort order: +
                         Map-reduce partition columns: UDFToDouble(_col0) 
(type: double)
-                        Statistics: Num rows: 230 Data size: 41274 Basic 
stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 242 Data size: 22990 Basic 
stats: COMPLETE Column stats: COMPLETE
                         value expressions: _col0 (type: int), _col1 (type: 
string)
             Execution mode: llap
             LLAP IO: no inputs
@@ -308,14 +496,14 @@ STAGE PLANS:
                   0 UDFToDouble(_col0) (type: double)
                   1 UDFToDouble(_col0) (type: double)
                 outputColumnNames: _col0, _col1, _col3
-                Statistics: Num rows: 550 Data size: 97900 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 391 Data size: 72726 Basic stats: 
COMPLETE Column stats: COMPLETE
                 Select Operator
                   expressions: _col0 (type: int), _col1 (type: string), _col3 
(type: string)
                   outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 550 Data size: 97900 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 391 Data size: 72726 Basic stats: 
COMPLETE Column stats: COMPLETE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 550 Data size: 97900 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 391 Data size: 72726 Basic stats: 
COMPLETE Column stats: COMPLETE
                     table:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -328,10 +516,12 @@ STAGE PLANS:
         ListSink
 
 PREHOOK: query: explain
-select a.key, b.key from (select key from tab_part where key > 1) a join 
(select key from tab_part where key > 2) b on a.key = b.key
+select a.key, a.value, b.value
+from tab1 a join src b on a.key = b.key
 PREHOOK: type: QUERY
 POSTHOOK: query: explain
-select a.key, b.key from (select key from tab_part where key > 1) a join 
(select key from tab_part where key > 2) b on a.key = b.key
+select a.key, a.value, b.value
+from tab1 a join src b on a.key = b.key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -342,59 +532,71 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 1 <- Map 2 (CUSTOM_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
-                  alias: tab_part
-                  Statistics: Num rows: 500 Data size: 7216 Basic stats: 
COMPLETE Column stats: NONE
+                  alias: a
+                  Statistics: Num rows: 242 Data size: 22990 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (key > 2) (type: boolean)
-                    Statistics: Num rows: 1 Data size: 14 Basic stats: 
COMPLETE Column stats: NONE
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 242 Data size: 22990 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
-                      expressions: key (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 14 Basic stats: 
COMPLETE Column stats: NONE
-                      Map Join Operator
-                        condition map:
-                             Inner Join 0 to 1
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-                        outputColumnNames: _col0, _col1
-                        input vertices:
-                          1 Map 2
-                        Statistics: Num rows: 1 Data size: 15 Basic stats: 
COMPLETE Column stats: NONE
-                        File Output Operator
-                          compressed: false
-                          Statistics: Num rows: 1 Data size: 15 Basic stats: 
COMPLETE Column stats: NONE
-                          table:
-                              input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                              output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                              serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      expressions: key (type: int), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 242 Data size: 22990 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: UDFToDouble(_col0) (type: double)
+                        sort order: +
+                        Map-reduce partition columns: UDFToDouble(_col0) 
(type: double)
+                        Statistics: Num rows: 242 Data size: 22990 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: int), _col1 (type: 
string)
             Execution mode: llap
             LLAP IO: no inputs
-        Map 2 
+        Map 3 
             Map Operator Tree:
                 TableScan
-                  alias: tab_part
-                  Statistics: Num rows: 500 Data size: 7216 Basic stats: 
COMPLETE Column stats: NONE
+                  alias: b
+                  Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (key > 2) (type: boolean)
-                    Statistics: Num rows: 1 Data size: 14 Basic stats: 
COMPLETE Column stats: NONE
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
-                      expressions: key (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 14 Basic stats: 
COMPLETE Column stats: NONE
+                      expressions: key (type: string), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
-                        key expressions: _col0 (type: int)
+                        key expressions: UDFToDouble(_col0) (type: double)
                         sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 1 Data size: 14 Basic stats: 
COMPLETE Column stats: NONE
+                        Map-reduce partition columns: UDFToDouble(_col0) 
(type: double)
+                        Statistics: Num rows: 500 Data size: 89000 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 UDFToDouble(_col0) (type: double)
+                  1 UDFToDouble(_col0) (type: double)
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 391 Data size: 72726 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: string), _col3 
(type: string)
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 391 Data size: 72726 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 391 Data size: 72726 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -403,10 +605,10 @@ STAGE PLANS:
         ListSink
 
 PREHOOK: query: explain
-select a.key, b.key from (select key from tab_part where key > 1) a left outer 
join (select key from tab_part where key > 2) b on a.key = b.key
+select a.key, b.key from (select key from tab_part where key > 1) a join 
(select key from tab_part where key > 2) b on a.key = b.key
 PREHOOK: type: QUERY
 POSTHOOK: query: explain
-select a.key, b.key from (select key from tab_part where key > 1) a left outer 
join (select key from tab_part where key > 2) b on a.key = b.key
+select a.key, b.key from (select key from tab_part where key > 1) a join 
(select key from tab_part where key > 2) b on a.key = b.key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -417,59 +619,536 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 1 <- Map 2 (CUSTOM_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
                   alias: tab_part
-                  Statistics: Num rows: 500 Data size: 7216 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (key > 1) (type: boolean)
-                    Statistics: Num rows: 1 Data size: 14 Basic stats: 
COMPLETE Column stats: NONE
+                    predicate: (key > 2) (type: boolean)
+                    Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 14 Basic stats: 
COMPLETE Column stats: NONE
-                      Map Join Operator
-                        condition map:
-                             Left Outer Join 0 to 1
-                        keys:
-                          0 _col0 (type: int)
-                          1 _col0 (type: int)
-                        outputColumnNames: _col0, _col1
-                        input vertices:
-                          1 Map 2
-                        Statistics: Num rows: 1 Data size: 15 Basic stats: 
COMPLETE Column stats: NONE
-                        File Output Operator
-                          compressed: false
-                          Statistics: Num rows: 1 Data size: 15 Basic stats: 
COMPLETE Column stats: NONE
-                          table:
-                              input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                              output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                              serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-            Execution mode: llap
-            LLAP IO: no inputs
+                      Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: tab_part
+                  Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (key > 2) (type: boolean)
+                    Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 267 Data size: 2136 Basic stats: 
COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 267 Data size: 2136 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain
+select a.key, b.key from (select key from tab_part where key > 1) a join 
(select key from tab_part where key > 2) b on a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a.key, b.key from (select key from tab_part where key > 1) a join 
(select key from tab_part where key > 2) b on a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Map 2 (CUSTOM_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tab_part
+                  Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (key > 2) (type: boolean)
+                    Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col0 (type: int)
+                          1 _col0 (type: int)
+                        outputColumnNames: _col0, _col1
+                        input vertices:
+                          1 Map 2
+                        Statistics: Num rows: 267 Data size: 2136 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        File Output Operator
+                          compressed: false
+                          Statistics: Num rows: 267 Data size: 2136 Basic 
stats: COMPLETE Column stats: COMPLETE
+                          table:
+                              input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                              output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                              serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: llap
+            LLAP IO: no inputs
         Map 2 
             Map Operator Tree:
                 TableScan
-                  alias: tab_part
-                  Statistics: Num rows: 500 Data size: 7216 Basic stats: 
COMPLETE Column stats: NONE
+                  alias: tab_part
+                  Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (key > 2) (type: boolean)
+                    Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain
+select a.key, b.key from (select key from tab_part where key > 1) a left outer 
join (select key from tab_part where key > 2) b on a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a.key, b.key from (select key from tab_part where key > 1) a left outer 
join (select key from tab_part where key > 2) b on a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tab_part
+                  Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (key > 1) (type: boolean)
+                    Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: tab_part
+                  Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (key > 2) (type: boolean)
+                    Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Outer Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 267 Data size: 2136 Basic stats: 
COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 267 Data size: 2136 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain
+select a.key, b.key from (select key from tab_part where key > 1) a left outer 
join (select key from tab_part where key > 2) b on a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a.key, b.key from (select key from tab_part where key > 1) a left outer 
join (select key from tab_part where key > 2) b on a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Map 2 (CUSTOM_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tab_part
+                  Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (key > 1) (type: boolean)
+                    Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Map Join Operator
+                        condition map:
+                             Left Outer Join 0 to 1
+                        keys:
+                          0 _col0 (type: int)
+                          1 _col0 (type: int)
+                        outputColumnNames: _col0, _col1
+                        input vertices:
+                          1 Map 2
+                        Statistics: Num rows: 267 Data size: 2136 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        File Output Operator
+                          compressed: false
+                          Statistics: Num rows: 267 Data size: 2136 Basic 
stats: COMPLETE Column stats: COMPLETE
+                          table:
+                              input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                              output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                              serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: tab_part
+                  Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (key > 2) (type: boolean)
+                    Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain
+select a.key, b.key from (select key from tab_part where key > 1) a right 
outer join (select key from tab_part where key > 2) b on a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a.key, b.key from (select key from tab_part where key > 1) a right 
outer join (select key from tab_part where key > 2) b on a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tab_part
+                  Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (key > 2) (type: boolean)
+                    Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: tab_part
+                  Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (key > 2) (type: boolean)
+                    Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Right Outer Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 267 Data size: 2136 Basic stats: 
COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 267 Data size: 2136 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain
+select a.key, b.key from (select key from tab_part where key > 1) a right 
outer join (select key from tab_part where key > 2) b on a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a.key, b.key from (select key from tab_part where key > 1) a right 
outer join (select key from tab_part where key > 2) b on a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 2 <- Map 1 (CUSTOM_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tab_part
+                  Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (key > 2) (type: boolean)
+                    Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: tab_part
+                  Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (key > 2) (type: boolean)
+                    Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 166 Data size: 664 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Map Join Operator
+                        condition map:
+                             Right Outer Join 0 to 1
+                        keys:
+                          0 _col0 (type: int)
+                          1 _col0 (type: int)
+                        outputColumnNames: _col0, _col1
+                        input vertices:
+                          0 Map 1
+                        Statistics: Num rows: 267 Data size: 2136 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        File Output Operator
+                          compressed: false
+                          Statistics: Num rows: 267 Data size: 2136 Basic 
stats: COMPLETE Column stats: COMPLETE
+                          table:
+                              input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                              output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                              serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: llap
+            LLAP IO: no inputs
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain select a.key, b.key from (select distinct key from 
tab) a join tab b on b.key = a.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select a.key, b.key from (select distinct key from 
tab) a join tab b on b.key = a.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tab
+                  Statistics: Num rows: 242 Data size: 968 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (key > 2) (type: boolean)
-                    Statistics: Num rows: 1 Data size: 14 Basic stats: 
COMPLETE Column stats: NONE
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 242 Data size: 968 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: key (type: int)
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 121 Data size: 484 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 121 Data size: 484 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 14 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 242 Data size: 968 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 1 Data size: 14 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 242 Data size: 968 Basic stats: 
COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 121 Data size: 484 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 121 Data size: 484 Basic stats: 
COMPLETE Column stats: COMPLETE
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 191 Data size: 1528 Basic stats: 
COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 191 Data size: 1528 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -477,11 +1156,9 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: explain
-select a.key, b.key from (select key from tab_part where key > 1) a right 
outer join (select key from tab_part where key > 2) b on a.key = b.key
+PREHOOK: query: explain select a.key, b.key from (select distinct key from 
tab) a join tab b on b.key = a.key
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
-select a.key, b.key from (select key from tab_part where key > 1) a right 
outer join (select key from tab_part where key > 2) b on a.key = b.key
+POSTHOOK: query: explain select a.key, b.key from (select distinct key from 
tab) a join tab b on b.key = a.key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -492,59 +1169,74 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 2 <- Map 1 (CUSTOM_EDGE)
+        Map 3 <- Reducer 2 (CUSTOM_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
-                  alias: tab_part
-                  Statistics: Num rows: 500 Data size: 7216 Basic stats: 
COMPLETE Column stats: NONE
+                  alias: tab
+                  Statistics: Num rows: 242 Data size: 968 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (key > 2) (type: boolean)
-                    Statistics: Num rows: 1 Data size: 14 Basic stats: 
COMPLETE Column stats: NONE
-                    Select Operator
-                      expressions: key (type: int)
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 242 Data size: 968 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: key (type: int)
+                      mode: hash
                       outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 14 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 121 Data size: 484 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 1 Data size: 14 Basic stats: 
COMPLETE Column stats: NONE
+                        Statistics: Num rows: 121 Data size: 484 Basic stats: 
COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
-        Map 2 
+        Map 3 
             Map Operator Tree:
                 TableScan
-                  alias: tab_part
-                  Statistics: Num rows: 500 Data size: 7216 Basic stats: 
COMPLETE Column stats: NONE
+                  alias: b
+                  Statistics: Num rows: 242 Data size: 968 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (key > 2) (type: boolean)
-                    Statistics: Num rows: 1 Data size: 14 Basic stats: 
COMPLETE Column stats: NONE
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 242 Data size: 968 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: int)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 1 Data size: 14 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 242 Data size: 968 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Map Join Operator
                         condition map:
-                             Right Outer Join 0 to 1
+                             Inner Join 0 to 1
                         keys:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
                         outputColumnNames: _col0, _col1
                         input vertices:
-                          0 Map 1
-                        Statistics: Num rows: 1 Data size: 15 Basic stats: 
COMPLETE Column stats: NONE
+                          0 Reducer 2
+                        Statistics: Num rows: 191 Data size: 1528 Basic stats: 
COMPLETE Column stats: COMPLETE
                         File Output Operator
                           compressed: false
-                          Statistics: Num rows: 1 Data size: 15 Basic stats: 
COMPLETE Column stats: NONE
+                          Statistics: Num rows: 191 Data size: 1528 Basic 
stats: COMPLETE Column stats: COMPLETE
                           table:
                               input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                               output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                               serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
             Execution mode: llap
             LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 121 Data size: 484 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 121 Data size: 484 Basic stats: 
COMPLETE Column stats: COMPLETE
 
   Stage: Stage-0
     Fetch Operator
@@ -552,9 +1244,9 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: explain select a.key, b.key from (select distinct key from 
tab) a join tab b on b.key = a.key
+PREHOOK: query: explain select a.value, b.value from (select distinct value 
from tab) a join tab b on b.key = a.value
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select a.key, b.key from (select distinct key from 
tab) a join tab b on b.key = a.key
+POSTHOOK: query: explain select a.value, b.value from (select distinct value 
from tab) a join tab b on b.key = a.value
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -565,69 +1257,82 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
                   alias: tab
-                  Statistics: Num rows: 242 Data size: 3490 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 242 Data size: 22022 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 230 Data size: 3316 Basic stats: 
COMPLETE Column stats: NONE
+                    predicate: value is not null (type: boolean)
+                    Statistics: Num rows: 242 Data size: 22022 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Group By Operator
-                      keys: key (type: int)
+                      keys: value (type: string)
                       mode: hash
                       outputColumnNames: _col0
-                      Statistics: Num rows: 230 Data size: 3316 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 121 Data size: 11011 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
-                        key expressions: _col0 (type: int)
+                        key expressions: _col0 (type: string)
                         sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 230 Data size: 3316 Basic stats: 
COMPLETE Column stats: NONE
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 121 Data size: 11011 Basic 
stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
-        Map 3 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: b
-                  Statistics: Num rows: 242 Data size: 3490 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 242 Data size: 22990 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 230 Data size: 3316 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 242 Data size: 22990 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
-                      expressions: key (type: int)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 230 Data size: 3316 Basic stats: 
COMPLETE Column stats: NONE
+                      expressions: key (type: int), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 242 Data size: 22990 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
-                        key expressions: _col0 (type: int)
+                        key expressions: UDFToDouble(_col0) (type: double)
                         sort order: +
-                        Map-reduce partition columns: _col0 (type: int)
-                        Statistics: Num rows: 230 Data size: 3316 Basic stats: 
COMPLETE Column stats: NONE
+                        Map-reduce partition columns: UDFToDouble(_col0) 
(type: double)
+                        Statistics: Num rows: 242 Data size: 22990 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
-                keys: KEY._col0 (type: int)
+                keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0
-                Statistics: Num rows: 115 Data size: 1658 Basic stats: 
COMPLETE Column stats: NONE
-                Map Join Operator
-                  condition map:
-                       Inner Join 0 to 1
-                  keys:
-                    0 _col0 (type: int)
-                    1 _col0 (type: int)
+                Statistics: Num rows: 121 Data size: 11011 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: UDFToDouble(_col0) (type: double)
+                  sort order: +
+                  Map-reduce partition columns: UDFToDouble(_col0) (type: 
double)
+                  Statistics: Num rows: 121 Data size: 11011 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  value expressions: _col0 (type: string)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 UDFToDouble(_col0) (type: double)
+                  1 UDFToDouble(_col0) (type: double)
+                outputColumnNames: _col0, _col2
+                Statistics: Num rows: 191 Data size: 34762 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: string), _col2 (type: string)
                   outputColumnNames: _col0, _col1
-                  input vertices:
-                    1 Map 3
-                  Statistics: Num rows: 253 Data size: 3647 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 191 Data size: 34762 Basic stats: 
COMPLETE Column stats: COMPLETE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 253 Data size: 3647 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 191 Data size: 34762 Basic stats: 
COMPLETE Column stats: COMPLETE
                     table:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -660,39 +1365,39 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: tab
-                  Statistics: Num rows: 242 Data size: 45070 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 242 Data size: 22022 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: value is not null (type: boolean)
-                    Statistics: Num rows: 230 Data size: 42835 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 242 Data size: 22022 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Group By Operator
                       keys: value (type: string)
                       mode: hash
                       outputColumnNames: _col0
-                      Statistics: Num rows: 230 Data size: 42835 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 121 Data size: 11011 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: string)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 230 Data size: 42835 Basic 
stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 121 Data size: 11011 Basic 
stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
         Map 4 
             Map Operator Tree:
                 TableScan
                   alias: b
-                  Statistics: Num rows: 242 Data size: 45994 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 242 Data size: 22990 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 230 Data size: 43713 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 242 Data size: 22990 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: int), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 230 Data size: 43713 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 242 Data size: 22990 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: UDFToDouble(_col0) (type: double)
                         sort order: +
                         Map-reduce partition columns: UDFToDouble(_col0) 
(type: double)
-                        Statistics: Num rows: 230 Data size: 43713 Basic 
stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 242 Data size: 22990 Basic 
stats: COMPLETE Column stats: COMPLETE
                         value expressions: _col1 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
@@ -703,12 +1408,12 @@ STAGE PLANS:
                 keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0
-                Statistics: Num rows: 115 Data size: 21417 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 121 Data size: 11011 Basic stats: 
COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: UDFToDouble(_col0) (type: double)
                   sort order: +
                   Map-reduce partition columns: UDFToDouble(_col0) (type: 
double)
-                  Statistics: Num rows: 115 Data size: 21417 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 121 Data size: 11011 Basic stats: 
COMPLETE Column stats: COMPLETE
                   value expressions: _col0 (type: string)
         Reducer 3 
             Execution mode: llap
@@ -720,14 +1425,14 @@ STAGE PLANS:
                   0 UDFToDouble(_col0) (type: double)
                   1 UDFToDouble(_col0) (type: double)
                 outputColumnNames: _col0, _col2
-                Statistics: Num rows: 253 Data size: 48084 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 191 Data size: 34762 Basic stats: 
COMPLETE Column stats: COMPLETE
                 Select Operator
                   expressions: _col0 (type: string), _col2 (type: string)
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 253 Data size: 48084 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 191 Data size: 34762 Basic stats: 
COMPLETE Column stats: COMPLETE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 253 Data size: 48084 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 191 Data size: 34762 Basic stats: 
COMPLETE Column stats: COMPLETE
                     table:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -739,3 +1444,266 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: CREATE TABLE tab_part1 (key int, value string) PARTITIONED 
BY(ds STRING) CLUSTERED BY (key, value) INTO 4 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tab_part1
+POSTHOOK: query: CREATE TABLE tab_part1 (key int, value string) PARTITIONED 
BY(ds STRING) CLUSTERED BY (key, value) INTO 4 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tab_part1
+PREHOOK: query: insert overwrite table tab_part1 partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin_part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket_mapjoin_part
+PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
+PREHOOK: Output: default@tab_part1@ds=2008-04-08
+POSTHOOK: query: insert overwrite table tab_part1 partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin_part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket_mapjoin_part
+POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
+POSTHOOK: Output: default@tab_part1@ds=2008-04-08
+POSTHOOK: Lineage: tab_part1 PARTITION(ds=2008-04-08).key SIMPLE 
[(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, 
comment:null), ]
+POSTHOOK: Lineage: tab_part1 PARTITION(ds=2008-04-08).value SIMPLE 
[(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, 
type:string, comment:null), ]
+PREHOOK: query: analyze table tab_part1 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab_part1
+PREHOOK: Input: default@tab_part1@ds=2008-04-08
+PREHOOK: Output: default@tab_part1
+PREHOOK: Output: default@tab_part1@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table tab_part1 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab_part1
+POSTHOOK: Input: default@tab_part1@ds=2008-04-08
+POSTHOOK: Output: default@tab_part1
+POSTHOOK: Output: default@tab_part1@ds=2008-04-08
+#### A masked pattern was here ####
+PREHOOK: query: explain
+select count(*)
+from
+(select distinct key,value from tab_part) a join tab b on a.key = b.key and 
a.value = b.value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*)
+from
+(select distinct key,value from tab_part) a join tab b on a.key = b.key and 
a.value = b.value
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE)
+        Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tab_part
+                  Statistics: Num rows: 500 Data size: 47500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (key is not null and value is not null) (type: 
boolean)
+                    Statistics: Num rows: 500 Data size: 47500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: key (type: int), value (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int), _col1 (type: 
string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: int), _col1 
(type: string)
+                        Statistics: Num rows: 250 Data size: 23750 Basic 
stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 242 Data size: 22990 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (key is not null and value is not null) (type: 
boolean)
+                    Statistics: Num rows: 242 Data size: 22990 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: int), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 242 Data size: 22990 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int), _col1 (type: 
string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: int), _col1 
(type: string)
+                        Statistics: Num rows: 242 Data size: 22990 Basic 
stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: int), _col1 
(type: string)
+                  Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int), _col1 (type: string)
+                  1 _col0 (type: int), _col1 (type: string)
+                Statistics: Num rows: 195 Data size: 1560 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                    value expressions: _col0 (type: bigint)
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain
+select count(*)
+from
+(select distinct key,value from tab_part) a join tab b on a.key = b.key and 
a.value = b.value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*)
+from
+(select distinct key,value from tab_part) a join tab b on a.key = b.key and 
a.value = b.value
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tab_part
+                  Statistics: Num rows: 500 Data size: 47500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (key is not null and value is not null) (type: 
boolean)
+                    Statistics: Num rows: 500 Data size: 47500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: key (type: int), value (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int), _col1 (type: 
string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: int), _col1 
(type: string)
+                        Statistics: Num rows: 250 Data size: 23750 Basic 
stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 242 Data size: 22990 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (key is not null and value is not null) (type: 
boolean)
+                    Statistics: Num rows: 242 Data size: 22990 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: int), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 242 Data size: 22990 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int), _col1 (type: 
string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: int), _col1 
(type: string)
+                        Statistics: Num rows: 242 Data size: 22990 Basic 
stats: COMPLETE Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int), KEY._col1 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 _col0 (type: int), _col1 (type: string)
+                    1 _col0 (type: int), _col1 (type: string)
+                  input vertices:
+                    1 Map 4
+                  Statistics: Num rows: 195 Data size: 1560 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: count()
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
[3/5] hive git commit: HIVE-17529: Bucket Map Join : Sets incorrect edge type causing execution failure (Deepak Jaiswal, reviewed by Jason Dere)

Reply via email to