http://git-wip-us.apache.org/repos/asf/hive/blob/8cdee629/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out
b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out
index 57c792f..e2cee7f 100644
--- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out
@@ -108,6 +108,62 @@ POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08
POSTHOOK: Output: default@tab@ds=2008-04-08
POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE
[(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int,
comment:null), ]
POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE
[(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string,
comment:null), ]
+PREHOOK: query: analyze table srcbucket_mapjoin compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket_mapjoin
+PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08
+PREHOOK: Output: default@srcbucket_mapjoin
+PREHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table srcbucket_mapjoin compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket_mapjoin
+POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08
+POSTHOOK: Output: default@srcbucket_mapjoin
+POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08
+#### A masked pattern was here ####
+PREHOOK: query: analyze table srcbucket_mapjoin_part compute statistics for
columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket_mapjoin_part
+PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
+PREHOOK: Output: default@srcbucket_mapjoin_part
+PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table srcbucket_mapjoin_part compute statistics for
columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket_mapjoin_part
+POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
+POSTHOOK: Output: default@srcbucket_mapjoin_part
+POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08
+#### A masked pattern was here ####
+PREHOOK: query: analyze table tab compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab
+PREHOOK: Input: default@tab@ds=2008-04-08
+PREHOOK: Output: default@tab
+PREHOOK: Output: default@tab@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table tab compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab
+POSTHOOK: Input: default@tab@ds=2008-04-08
+POSTHOOK: Output: default@tab
+POSTHOOK: Output: default@tab@ds=2008-04-08
+#### A masked pattern was here ####
+PREHOOK: query: analyze table tab_part compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab_part
+PREHOOK: Input: default@tab_part@ds=2008-04-08
+PREHOOK: Output: default@tab_part
+PREHOOK: Output: default@tab_part@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table tab_part compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab_part
+POSTHOOK: Input: default@tab_part@ds=2008-04-08
+POSTHOOK: Output: default@tab_part
+POSTHOOK: Output: default@tab_part@ds=2008-04-08
+#### A masked pattern was here ####
PREHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on
a.key = c.key join tab_part b on a.value = b.value
PREHOOK: type: QUERY
POSTHOOK: query: explain select a.key, b.key from tab_part a join tab_part c
on a.key = c.key join tab_part b on a.value = b.value
@@ -121,94 +177,216 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (CUSTOM_EDGE)
- Map 3 <- Map 2 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 47500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key is not null and value is not null) (type:
boolean)
+ Statistics: Num rows: 500 Data size: 47500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 47500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 500 Data size: 47500 Basic
stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
+ Filter Operator
+ predicate: value is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 47500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 47500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 47500 Basic
stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
alias: c
- Statistics: Num rows: 500 Data size: 1904 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 2000 Basic stats:
COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 475 Data size: 1808 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 2000 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 475 Data size: 1808 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 2000 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 475 Data size: 1808 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 2000 Basic stats:
COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
- Map 2
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 809 Data size: 76855 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 809 Data size: 76855 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col1 (type: string)
+ outputColumnNames: _col0, _col3
+ Statistics: Num rows: 1309 Data size: 10472 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col3 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1309 Data size: 10472 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1309 Data size: 10472 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on
a.key = c.key join tab_part b on a.value = b.value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select a.key, b.key from tab_part a join tab_part c
on a.key = c.key join tab_part b on a.value = b.value
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 3 (CUSTOM_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
Map Operator Tree:
TableScan
alias: a
- Statistics: Num rows: 500 Data size: 89488 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 47500 Basic stats:
COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (key is not null and value is not null) (type:
boolean)
- Statistics: Num rows: 450 Data size: 80539 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 47500 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 450 Data size: 80539 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 47500 Basic stats:
COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- outputColumnNames: _col1, _col2
+ outputColumnNames: _col0, _col1
input vertices:
- 0 Map 1
- Statistics: Num rows: 522 Data size: 1988 Basic stats:
COMPLETE Column stats: NONE
+ 1 Map 3
+ Statistics: Num rows: 809 Data size: 76855 Basic
stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col2 (type: string)
+ key expressions: _col1 (type: string)
sort order: +
- Map-reduce partition columns: _col2 (type: string)
- Statistics: Num rows: 522 Data size: 1988 Basic
stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: int)
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 809 Data size: 76855 Basic
stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int)
Execution mode: llap
LLAP IO: no inputs
Map 3
Map Operator Tree:
TableScan
+ alias: c
+ Statistics: Num rows: 500 Data size: 2000 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 2000 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 2000 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 500 Data size: 2000 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
alias: b
- Statistics: Num rows: 500 Data size: 89488 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 47500 Basic stats:
COMPLETE Column stats: COMPLETE
Filter Operator
predicate: value is not null (type: boolean)
- Statistics: Num rows: 475 Data size: 85013 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 47500 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 475 Data size: 85013 Basic stats:
COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col2 (type: string)
- 1 _col1 (type: string)
- outputColumnNames: _col1, _col3
- input vertices:
- 0 Map 2
- Statistics: Num rows: 574 Data size: 2186 Basic stats:
COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: int), _col3 (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 574 Data size: 2186 Basic
stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 574 Data size: 2186 Basic
stats: COMPLETE Column stats: NONE
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Statistics: Num rows: 500 Data size: 47500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 500 Data size: 47500 Basic
stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int)
Execution mode: llap
LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col1 (type: string)
+ outputColumnNames: _col0, _col3
+ Statistics: Num rows: 1309 Data size: 10472 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col3 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1309 Data size: 10472 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1309 Data size: 10472 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -238,6 +416,16 @@ POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08
POSTHOOK: Output: default@tab1
POSTHOOK: Lineage: tab1.key SIMPLE
[(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int,
comment:null), ]
POSTHOOK: Lineage: tab1.value SIMPLE
[(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string,
comment:null), ]
+PREHOOK: query: analyze table tab1 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+PREHOOK: Output: default@tab1
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table tab1 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+POSTHOOK: Output: default@tab1
+#### A masked pattern was here ####
PREHOOK: query: explain
select a.key, a.value, b.value
from tab1 a join src b on a.key = b.key
@@ -262,19 +450,19 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: a
- Statistics: Num rows: 242 Data size: 43428 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 22990 Basic stats:
COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 230 Data size: 41274 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 22990 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 230 Data size: 41274 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 22990 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: UDFToDouble(_col0) (type: double)
sort order: +
Map-reduce partition columns: UDFToDouble(_col0)
(type: double)
- Statistics: Num rows: 230 Data size: 41274 Basic
stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 22990 Basic
stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: int), _col1 (type:
string)
Execution mode: llap
LLAP IO: no inputs
@@ -308,14 +496,14 @@ STAGE PLANS:
0 UDFToDouble(_col0) (type: double)
1 UDFToDouble(_col0) (type: double)
outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 550 Data size: 97900 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 391 Data size: 72726 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), _col1 (type: string), _col3
(type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 550 Data size: 97900 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 391 Data size: 72726 Basic stats:
COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 550 Data size: 97900 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 391 Data size: 72726 Basic stats:
COMPLETE Column stats: COMPLETE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -328,10 +516,12 @@ STAGE PLANS:
ListSink
PREHOOK: query: explain
-select a.key, b.key from (select key from tab_part where key > 1) a join
(select key from tab_part where key > 2) b on a.key = b.key
+select a.key, a.value, b.value
+from tab1 a join src b on a.key = b.key
PREHOOK: type: QUERY
POSTHOOK: query: explain
-select a.key, b.key from (select key from tab_part where key > 1) a join
(select key from tab_part where key > 2) b on a.key = b.key
+select a.key, a.value, b.value
+from tab1 a join src b on a.key = b.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -342,59 +532,71 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 2 (CUSTOM_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: tab_part
- Statistics: Num rows: 500 Data size: 7216 Basic stats:
COMPLETE Column stats: NONE
+ alias: a
+ Statistics: Num rows: 242 Data size: 22990 Basic stats:
COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (key > 2) (type: boolean)
- Statistics: Num rows: 1 Data size: 14 Basic stats:
COMPLETE Column stats: NONE
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 242 Data size: 22990 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 14 Basic stats:
COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1
- input vertices:
- 1 Map 2
- Statistics: Num rows: 1 Data size: 15 Basic stats:
COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 15 Basic stats:
COMPLETE Column stats: NONE
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 242 Data size: 22990 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: UDFToDouble(_col0) (type: double)
+ sort order: +
+ Map-reduce partition columns: UDFToDouble(_col0)
(type: double)
+ Statistics: Num rows: 242 Data size: 22990 Basic
stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type:
string)
Execution mode: llap
LLAP IO: no inputs
- Map 2
+ Map 3
Map Operator Tree:
TableScan
- alias: tab_part
- Statistics: Num rows: 500 Data size: 7216 Basic stats:
COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (key > 2) (type: boolean)
- Statistics: Num rows: 1 Data size: 14 Basic stats:
COMPLETE Column stats: NONE
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 14 Basic stats:
COMPLETE Column stats: NONE
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col0 (type: int)
+ key expressions: UDFToDouble(_col0) (type: double)
sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 14 Basic stats:
COMPLETE Column stats: NONE
+ Map-reduce partition columns: UDFToDouble(_col0)
(type: double)
+ Statistics: Num rows: 500 Data size: 89000 Basic
stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
Execution mode: llap
LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 UDFToDouble(_col0) (type: double)
+ 1 UDFToDouble(_col0) (type: double)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 391 Data size: 72726 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col3
(type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 391 Data size: 72726 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 391 Data size: 72726 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -403,10 +605,10 @@ STAGE PLANS:
ListSink
PREHOOK: query: explain
-select a.key, b.key from (select key from tab_part where key > 1) a left outer
join (select key from tab_part where key > 2) b on a.key = b.key
+select a.key, b.key from (select key from tab_part where key > 1) a join
(select key from tab_part where key > 2) b on a.key = b.key
PREHOOK: type: QUERY
POSTHOOK: query: explain
-select a.key, b.key from (select key from tab_part where key > 1) a left outer
join (select key from tab_part where key > 2) b on a.key = b.key
+select a.key, b.key from (select key from tab_part where key > 1) a join
(select key from tab_part where key > 2) b on a.key = b.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -417,59 +619,536 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Map 2 (CUSTOM_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: tab_part
- Statistics: Num rows: 500 Data size: 7216 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 500 Data size: 2000 Basic stats:
COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (key > 1) (type: boolean)
- Statistics: Num rows: 1 Data size: 14 Basic stats:
COMPLETE Column stats: NONE
+ predicate: (key > 2) (type: boolean)
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 14 Basic stats:
COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Left Outer Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1
- input vertices:
- 1 Map 2
- Statistics: Num rows: 1 Data size: 15 Basic stats:
COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 15 Basic stats:
COMPLETE Column stats: NONE
- table:
- input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
- output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: llap
- LLAP IO: no inputs
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: tab_part
+ Statistics: Num rows: 500 Data size: 2000 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key > 2) (type: boolean)
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 267 Data size: 2136 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 267 Data size: 2136 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+select a.key, b.key from (select key from tab_part where key > 1) a join
(select key from tab_part where key > 2) b on a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a.key, b.key from (select key from tab_part where key > 1) a join
(select key from tab_part where key > 2) b on a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 2 (CUSTOM_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tab_part
+ Statistics: Num rows: 500 Data size: 2000 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key > 2) (type: boolean)
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Map 2
+ Statistics: Num rows: 267 Data size: 2136 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 267 Data size: 2136 Basic
stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: llap
+ LLAP IO: no inputs
Map 2
Map Operator Tree:
TableScan
- alias: tab_part
- Statistics: Num rows: 500 Data size: 7216 Basic stats:
COMPLETE Column stats: NONE
+ alias: tab_part
+ Statistics: Num rows: 500 Data size: 2000 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key > 2) (type: boolean)
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+select a.key, b.key from (select key from tab_part where key > 1) a left outer
join (select key from tab_part where key > 2) b on a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a.key, b.key from (select key from tab_part where key > 1) a left outer
join (select key from tab_part where key > 2) b on a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tab_part
+ Statistics: Num rows: 500 Data size: 2000 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key > 1) (type: boolean)
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: tab_part
+ Statistics: Num rows: 500 Data size: 2000 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key > 2) (type: boolean)
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 267 Data size: 2136 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 267 Data size: 2136 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+select a.key, b.key from (select key from tab_part where key > 1) a left outer
join (select key from tab_part where key > 2) b on a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a.key, b.key from (select key from tab_part where key > 1) a left outer
join (select key from tab_part where key > 2) b on a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 2 (CUSTOM_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tab_part
+ Statistics: Num rows: 500 Data size: 2000 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key > 1) (type: boolean)
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Map 2
+ Statistics: Num rows: 267 Data size: 2136 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 267 Data size: 2136 Basic
stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: tab_part
+ Statistics: Num rows: 500 Data size: 2000 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key > 2) (type: boolean)
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+select a.key, b.key from (select key from tab_part where key > 1) a right
outer join (select key from tab_part where key > 2) b on a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a.key, b.key from (select key from tab_part where key > 1) a right
outer join (select key from tab_part where key > 2) b on a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tab_part
+ Statistics: Num rows: 500 Data size: 2000 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key > 2) (type: boolean)
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: tab_part
+ Statistics: Num rows: 500 Data size: 2000 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key > 2) (type: boolean)
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Right Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 267 Data size: 2136 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 267 Data size: 2136 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+select a.key, b.key from (select key from tab_part where key > 1) a right
outer join (select key from tab_part where key > 2) b on a.key = b.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a.key, b.key from (select key from tab_part where key > 1) a right
outer join (select key from tab_part where key > 2) b on a.key = b.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 2 <- Map 1 (CUSTOM_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tab_part
+ Statistics: Num rows: 500 Data size: 2000 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key > 2) (type: boolean)
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: tab_part
+ Statistics: Num rows: 500 Data size: 2000 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key > 2) (type: boolean)
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 166 Data size: 664 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Right Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 0 Map 1
+ Statistics: Num rows: 267 Data size: 2136 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 267 Data size: 2136 Basic
stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: llap
+ LLAP IO: no inputs
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select a.key, b.key from (select distinct key from
tab) a join tab b on b.key = a.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select a.key, b.key from (select distinct key from
tab) a join tab b on b.key = a.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tab
+ Statistics: Num rows: 242 Data size: 968 Basic stats:
COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (key > 2) (type: boolean)
- Statistics: Num rows: 1 Data size: 14 Basic stats:
COMPLETE Column stats: NONE
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 242 Data size: 968 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 121 Data size: 484 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 121 Data size: 484 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 14 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 968 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 14 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 968 Basic stats:
COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 121 Data size: 484 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 121 Data size: 484 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 191 Data size: 1528 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 191 Data size: 1528 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -477,11 +1156,9 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: explain
-select a.key, b.key from (select key from tab_part where key > 1) a right
outer join (select key from tab_part where key > 2) b on a.key = b.key
+PREHOOK: query: explain select a.key, b.key from (select distinct key from
tab) a join tab b on b.key = a.key
PREHOOK: type: QUERY
-POSTHOOK: query: explain
-select a.key, b.key from (select key from tab_part where key > 1) a right
outer join (select key from tab_part where key > 2) b on a.key = b.key
+POSTHOOK: query: explain select a.key, b.key from (select distinct key from
tab) a join tab b on b.key = a.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -492,59 +1169,74 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 2 <- Map 1 (CUSTOM_EDGE)
+ Map 3 <- Reducer 2 (CUSTOM_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: tab_part
- Statistics: Num rows: 500 Data size: 7216 Basic stats:
COMPLETE Column stats: NONE
+ alias: tab
+ Statistics: Num rows: 242 Data size: 968 Basic stats:
COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (key > 2) (type: boolean)
- Statistics: Num rows: 1 Data size: 14 Basic stats:
COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 242 Data size: 968 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: int)
+ mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 14 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 121 Data size: 484 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 14 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 121 Data size: 484 Basic stats:
COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
- Map 2
+ Map 3
Map Operator Tree:
TableScan
- alias: tab_part
- Statistics: Num rows: 500 Data size: 7216 Basic stats:
COMPLETE Column stats: NONE
+ alias: b
+ Statistics: Num rows: 242 Data size: 968 Basic stats:
COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (key > 2) (type: boolean)
- Statistics: Num rows: 1 Data size: 14 Basic stats:
COMPLETE Column stats: NONE
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 242 Data size: 968 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 14 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 968 Basic stats:
COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
- Right Outer Join 0 to 1
+ Inner Join 0 to 1
keys:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0, _col1
input vertices:
- 0 Map 1
- Statistics: Num rows: 1 Data size: 15 Basic stats:
COMPLETE Column stats: NONE
+ 0 Reducer 2
+ Statistics: Num rows: 191 Data size: 1528 Basic stats:
COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 15 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 191 Data size: 1528 Basic
stats: COMPLETE Column stats: COMPLETE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: llap
LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 121 Data size: 484 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 121 Data size: 484 Basic stats:
COMPLETE Column stats: COMPLETE
Stage: Stage-0
Fetch Operator
@@ -552,9 +1244,9 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: explain select a.key, b.key from (select distinct key from
tab) a join tab b on b.key = a.key
+PREHOOK: query: explain select a.value, b.value from (select distinct value
from tab) a join tab b on b.key = a.value
PREHOOK: type: QUERY
-POSTHOOK: query: explain select a.key, b.key from (select distinct key from
tab) a join tab b on b.key = a.key
+POSTHOOK: query: explain select a.value, b.value from (select distinct value
from tab) a join tab b on b.key = a.value
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -565,69 +1257,82 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: tab
- Statistics: Num rows: 242 Data size: 3490 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 22022 Basic stats:
COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 230 Data size: 3316 Basic stats:
COMPLETE Column stats: NONE
+ predicate: value is not null (type: boolean)
+ Statistics: Num rows: 242 Data size: 22022 Basic stats:
COMPLETE Column stats: COMPLETE
Group By Operator
- keys: key (type: int)
+ keys: value (type: string)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 230 Data size: 3316 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 121 Data size: 11011 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col0 (type: int)
+ key expressions: _col0 (type: string)
sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 230 Data size: 3316 Basic stats:
COMPLETE Column stats: NONE
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 121 Data size: 11011 Basic
stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
- Map 3
+ Map 4
Map Operator Tree:
TableScan
alias: b
- Statistics: Num rows: 242 Data size: 3490 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 22990 Basic stats:
COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 230 Data size: 3316 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 22990 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 230 Data size: 3316 Basic stats:
COMPLETE Column stats: NONE
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 242 Data size: 22990 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col0 (type: int)
+ key expressions: UDFToDouble(_col0) (type: double)
sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 230 Data size: 3316 Basic stats:
COMPLETE Column stats: NONE
+ Map-reduce partition columns: UDFToDouble(_col0)
(type: double)
+ Statistics: Num rows: 242 Data size: 22990 Basic
stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
Execution mode: llap
LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- keys: KEY._col0 (type: int)
+ keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 115 Data size: 1658 Basic stats:
COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
+ Statistics: Num rows: 121 Data size: 11011 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: UDFToDouble(_col0) (type: double)
+ sort order: +
+ Map-reduce partition columns: UDFToDouble(_col0) (type:
double)
+ Statistics: Num rows: 121 Data size: 11011 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 UDFToDouble(_col0) (type: double)
+ 1 UDFToDouble(_col0) (type: double)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 191 Data size: 34762 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: string)
outputColumnNames: _col0, _col1
- input vertices:
- 1 Map 3
- Statistics: Num rows: 253 Data size: 3647 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 191 Data size: 34762 Basic stats:
COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 253 Data size: 3647 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 191 Data size: 34762 Basic stats:
COMPLETE Column stats: COMPLETE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -660,39 +1365,39 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: tab
- Statistics: Num rows: 242 Data size: 45070 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 22022 Basic stats:
COMPLETE Column stats: COMPLETE
Filter Operator
predicate: value is not null (type: boolean)
- Statistics: Num rows: 230 Data size: 42835 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 22022 Basic stats:
COMPLETE Column stats: COMPLETE
Group By Operator
keys: value (type: string)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 230 Data size: 42835 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 121 Data size: 11011 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 230 Data size: 42835 Basic
stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 121 Data size: 11011 Basic
stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
Map 4
Map Operator Tree:
TableScan
alias: b
- Statistics: Num rows: 242 Data size: 45994 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 22990 Basic stats:
COMPLETE Column stats: COMPLETE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 230 Data size: 43713 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 22990 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 230 Data size: 43713 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 22990 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: UDFToDouble(_col0) (type: double)
sort order: +
Map-reduce partition columns: UDFToDouble(_col0)
(type: double)
- Statistics: Num rows: 230 Data size: 43713 Basic
stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 242 Data size: 22990 Basic
stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: string)
Execution mode: llap
LLAP IO: no inputs
@@ -703,12 +1408,12 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 115 Data size: 21417 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 121 Data size: 11011 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: UDFToDouble(_col0) (type: double)
sort order: +
Map-reduce partition columns: UDFToDouble(_col0) (type:
double)
- Statistics: Num rows: 115 Data size: 21417 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 121 Data size: 11011 Basic stats:
COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: string)
Reducer 3
Execution mode: llap
@@ -720,14 +1425,14 @@ STAGE PLANS:
0 UDFToDouble(_col0) (type: double)
1 UDFToDouble(_col0) (type: double)
outputColumnNames: _col0, _col2
- Statistics: Num rows: 253 Data size: 48084 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 191 Data size: 34762 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col2 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 253 Data size: 48084 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 191 Data size: 34762 Basic stats:
COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 253 Data size: 48084 Basic stats:
COMPLETE Column stats: NONE
+ Statistics: Num rows: 191 Data size: 34762 Basic stats:
COMPLETE Column stats: COMPLETE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -739,3 +1444,266 @@ STAGE PLANS:
Processor Tree:
ListSink
+PREHOOK: query: CREATE TABLE tab_part1 (key int, value string) PARTITIONED
BY(ds STRING) CLUSTERED BY (key, value) INTO 4 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tab_part1
+POSTHOOK: query: CREATE TABLE tab_part1 (key int, value string) PARTITIONED
BY(ds STRING) CLUSTERED BY (key, value) INTO 4 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tab_part1
+PREHOOK: query: insert overwrite table tab_part1 partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin_part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket_mapjoin_part
+PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
+PREHOOK: Output: default@tab_part1@ds=2008-04-08
+POSTHOOK: query: insert overwrite table tab_part1 partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin_part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket_mapjoin_part
+POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08
+POSTHOOK: Output: default@tab_part1@ds=2008-04-08
+POSTHOOK: Lineage: tab_part1 PARTITION(ds=2008-04-08).key SIMPLE
[(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int,
comment:null), ]
+POSTHOOK: Lineage: tab_part1 PARTITION(ds=2008-04-08).value SIMPLE
[(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value,
type:string, comment:null), ]
+PREHOOK: query: analyze table tab_part1 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab_part1
+PREHOOK: Input: default@tab_part1@ds=2008-04-08
+PREHOOK: Output: default@tab_part1
+PREHOOK: Output: default@tab_part1@ds=2008-04-08
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table tab_part1 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab_part1
+POSTHOOK: Input: default@tab_part1@ds=2008-04-08
+POSTHOOK: Output: default@tab_part1
+POSTHOOK: Output: default@tab_part1@ds=2008-04-08
+#### A masked pattern was here ####
+PREHOOK: query: explain
+select count(*)
+from
+(select distinct key,value from tab_part) a join tab b on a.key = b.key and
a.value = b.value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*)
+from
+(select distinct key,value from tab_part) a join tab b on a.key = b.key and
a.value = b.value
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE)
+ Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tab_part
+ Statistics: Num rows: 500 Data size: 47500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key is not null and value is not null) (type:
boolean)
+ Statistics: Num rows: 500 Data size: 47500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: int), value (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 23750 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type:
string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1
(type: string)
+ Statistics: Num rows: 250 Data size: 23750 Basic
stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 242 Data size: 22990 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key is not null and value is not null) (type:
boolean)
+ Statistics: Num rows: 242 Data size: 22990 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 242 Data size: 22990 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type:
string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1
(type: string)
+ Statistics: Num rows: 242 Data size: 22990 Basic
stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 23750 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1
(type: string)
+ Statistics: Num rows: 250 Data size: 23750 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int), _col1 (type: string)
+ 1 _col0 (type: int), _col1 (type: string)
+ Statistics: Num rows: 195 Data size: 1560 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+select count(*)
+from
+(select distinct key,value from tab_part) a join tab b on a.key = b.key and
a.value = b.value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*)
+from
+(select distinct key,value from tab_part) a join tab b on a.key = b.key and
a.value = b.value
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tab_part
+ Statistics: Num rows: 500 Data size: 47500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key is not null and value is not null) (type:
boolean)
+ Statistics: Num rows: 500 Data size: 47500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: int), value (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 23750 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type:
string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1
(type: string)
+ Statistics: Num rows: 250 Data size: 23750 Basic
stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 242 Data size: 22990 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key is not null and value is not null) (type:
boolean)
+ Statistics: Num rows: 242 Data size: 22990 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 242 Data size: 22990 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type:
string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1
(type: string)
+ Statistics: Num rows: 242 Data size: 22990 Basic
stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 23750 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int), _col1 (type: string)
+ 1 _col0 (type: int), _col1 (type: string)
+ input vertices:
+ 1 Map 4
+ Statistics: Num rows: 195 Data size: 1560 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+