http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out
----------------------------------------------------------------------
diff --git
a/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out
b/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out
index b075ecf..801948c 100644
--- a/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out
+++ b/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out
@@ -983,7 +983,7 @@ STAGE PLANS:
Reduce Operator Tree:
Merge Join Operator
condition map:
- Outer Join 0 to 1
+ Full Outer Join 0 to 1
keys:
0 _col0 (type: string)
1 _col0 (type: string)
@@ -1153,7 +1153,7 @@ STAGE PLANS:
Reduce Operator Tree:
Merge Join Operator
condition map:
- Outer Join 0 to 1
+ Full Outer Join 0 to 1
keys:
0 _col0 (type: string)
1 _col0 (type: string)
@@ -1229,6 +1229,910 @@ POSTHOOK: Input: default@src1
#### A masked pattern was here ####
12744278 500 652447 25
PREHOOK: query: EXPLAIN
+SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2))
+FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2
+ FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
+ FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
+ ON (a.key = b.key)) tmp
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2))
+FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2
+ FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
+ FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
+ ON (a.key = b.key)) tmp
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE)
+ Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+ Reducer 6 <- Map 5 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: x
+ Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(value)
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 23750 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 23750 Basic
stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: y
+ Statistics: Num rows: 25 Data size: 4375 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 25 Data size: 4375 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(value)
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 1128 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 12 Data size: 1128 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 23750 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 23750 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Full Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 262 Data size: 2457 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: hash(_col0) (type: int), hash(_col1) (type:
int), hash(_col2) (type: int), hash(_col3) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 262 Data size: 2457 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col0), sum(_col1), sum(_col2),
sum(_col3)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 32 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 32 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type:
bigint), _col2 (type: bigint), _col3 (type: bigint)
+ Reducer 4
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1),
sum(VALUE._col2), sum(VALUE._col3)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 6
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 12 Data size: 1128 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)),
SUM(HASH(cnt2))
+FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2
+ FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
+ FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
+ ON (a.key = b.key)) tmp
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)),
SUM(HASH(cnt2))
+FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2
+ FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
+ FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
+ ON (a.key = b.key)) tmp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+12744278 500 652447 25
+PREHOOK: query: EXPLAIN
+SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2))
+FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2
+ FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
+ FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
+ ON (a.key = b.key)) tmp
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2))
+FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2
+ FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
+ FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
+ ON (a.key = b.key)) tmp
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE)
+ Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+ Reducer 6 <- Map 5 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: x
+ Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(value)
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 23750 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 23750 Basic
stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: y
+ Statistics: Num rows: 25 Data size: 4375 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 25 Data size: 4375 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(value)
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 1128 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 12 Data size: 1128 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 23750 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 23750 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Full Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 262 Data size: 2457 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: hash(_col0) (type: int), hash(_col1) (type:
int), hash(_col2) (type: int), hash(_col3) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 262 Data size: 2457 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col0), sum(_col1), sum(_col2),
sum(_col3)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 32 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 32 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type:
bigint), _col2 (type: bigint), _col3 (type: bigint)
+ Reducer 4
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1),
sum(VALUE._col2), sum(VALUE._col3)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 6
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 12 Data size: 1128 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)),
SUM(HASH(cnt2))
+FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2
+ FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
+ FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
+ ON (a.key = b.key)) tmp
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)),
SUM(HASH(cnt2))
+FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2
+ FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
+ FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
+ ON (a.key = b.key)) tmp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+12744278 500 652447 25
+PREHOOK: query: EXPLAIN
+SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
+FROM (SELECT a.key AS key, count(1) AS cnt
+ FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
+ FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
+ ON (a.key = b.key)
+ GROUP BY a.key) tmp
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
+FROM (SELECT a.key AS key, count(1) AS cnt
+ FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
+ FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
+ ON (a.key = b.key)
+ GROUP BY a.key) tmp
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE)
+ Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
+ Reducer 7 <- Map 6 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: x
+ Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 21750 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 21750 Basic
stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: y
+ Statistics: Num rows: 25 Data size: 2150 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 25 Data size: 2150 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 12 Data size: 1032 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 12 Data size: 1032 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 21750 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 21750 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Full Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 262 Data size: 1131 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 13 Data size: 278 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 13 Data size: 278 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Reducer 4
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 13 Data size: 278 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: hash(_col0) (type: int), hash(_col1) (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 13 Data size: 278 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col0), sum(_col1)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type:
bigint)
+ Reducer 5
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 7
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 12 Data size: 1032 Basic stats:
COMPLETE Column stats: COMPLETE
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
+FROM (SELECT a.key AS key, count(1) AS cnt
+ FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
+ FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
+ ON (a.key = b.key)
+ GROUP BY a.key) tmp
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
+FROM (SELECT a.key AS key, count(1) AS cnt
+ FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
+ FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
+ ON (a.key = b.key)
+ GROUP BY a.key) tmp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+12744278 310
+PREHOOK: query: EXPLAIN
+SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
+FROM (SELECT a.key AS key, count(1) AS cnt
+ FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
+ FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
+ ON (a.key = b.key)
+ GROUP BY a.key) tmp
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
+FROM (SELECT a.key AS key, count(1) AS cnt
+ FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
+ FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
+ ON (a.key = b.key)
+ GROUP BY a.key) tmp
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE)
+ Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
+ Reducer 7 <- Map 6 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: x
+ Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 21750 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 21750 Basic
stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: y
+ Statistics: Num rows: 25 Data size: 2150 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 25 Data size: 2150 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 12 Data size: 1032 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 12 Data size: 1032 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 21750 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 21750 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Full Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 262 Data size: 1131 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 13 Data size: 278 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 13 Data size: 278 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Reducer 4
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 13 Data size: 278 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: hash(_col0) (type: int), hash(_col1) (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 13 Data size: 278 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col0), sum(_col1)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type:
bigint)
+ Reducer 5
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 7
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 12 Data size: 1032 Basic stats:
COMPLETE Column stats: COMPLETE
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
+FROM (SELECT a.key AS key, count(1) AS cnt
+ FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
+ FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
+ ON (a.key = b.key)
+ GROUP BY a.key) tmp
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
+FROM (SELECT a.key AS key, count(1) AS cnt
+ FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
+ FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
+ ON (a.key = b.key)
+ GROUP BY a.key) tmp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+12744278 310
+PREHOOK: query: EXPLAIN
+SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
+FROM (SELECT a.key AS key, count(1) AS cnt
+ FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
+ FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
+ ON (a.key = b.key)
+ GROUP BY a.key) tmp
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
+FROM (SELECT a.key AS key, count(1) AS cnt
+ FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
+ FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
+ ON (a.key = b.key)
+ GROUP BY a.key) tmp
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE)
+ Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+ Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
+ Reducer 7 <- Map 6 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: x
+ Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 21750 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 21750 Basic
stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: y
+ Statistics: Num rows: 25 Data size: 2150 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 25 Data size: 2150 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 12 Data size: 1032 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 12 Data size: 1032 Basic stats:
COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 21750 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 21750 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Full Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 262 Data size: 1131 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 13 Data size: 278 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 13 Data size: 278 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Reducer 4
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 13 Data size: 278 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: hash(_col0) (type: int), hash(_col1) (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 13 Data size: 278 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col0), sum(_col1)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type:
bigint)
+ Reducer 5
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 7
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 12 Data size: 1032 Basic stats:
COMPLETE Column stats: COMPLETE
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
+FROM (SELECT a.key AS key, count(1) AS cnt
+ FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
+ FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
+ ON (a.key = b.key)
+ GROUP BY a.key) tmp
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
+FROM (SELECT a.key AS key, count(1) AS cnt
+ FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
+ FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
+ ON (a.key = b.key)
+ GROUP BY a.key) tmp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+12744278 310
+PREHOOK: query: EXPLAIN
SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
FROM (SELECT a.key AS key, count(1) AS cnt
FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
@@ -1320,7 +2224,7 @@ STAGE PLANS:
Reduce Operator Tree:
Merge Join Operator
condition map:
- Outer Join 0 to 1
+ Full Outer Join 0 to 1
keys:
0 _col0 (type: string)
1 _col0 (type: string)
@@ -1352,42 +2256,207 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Statistics: Num rows: 13 Data size: 278 Basic stats:
COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: sum(_col0), sum(_col1)
+ aggregations: sum(_col0), sum(_col1)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type:
bigint)
+ Reducer 5
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 7
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 12 Data size: 1032 Basic stats:
COMPLETE Column stats: COMPLETE
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
+FROM (SELECT a.key AS key, count(1) AS cnt
+ FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
+ FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
+ ON (a.key = b.key)
+ GROUP BY a.key) tmp
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
+FROM (SELECT a.key AS key, count(1) AS cnt
+ FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
+ FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
+ ON (a.key = b.key)
+ GROUP BY a.key) tmp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+12744278 310
+PREHOOK: query: EXPLAIN
+SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2))
+FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2
+ FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON
(x.key = y.key)) a
+ JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by
z.key) b
+ ON (a.key = b.key)) tmp
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2))
+FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2
+ FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON
(x.key = y.key)) a
+ JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by
z.key) b
+ ON (a.key = b.key)) tmp
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Reducer 4
(ONE_TO_ONE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 4 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: x
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 4375 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
+ Group By Operator
+ aggregations: count(value)
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 1128 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 12 Data size: 1128 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: y
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 43500 Basic
stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ 2 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 29 Data size: 7801 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: hash(_col0) (type: int), hash(_col1) (type:
int), hash(_col2) (type: int), hash(_col3) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 29 Data size: 7801 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col0), sum(_col1), sum(_col2),
sum(_col3)
mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 32 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint), _col1 (type:
bigint)
- Reducer 5
+ Statistics: Num rows: 1 Data size: 32 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type:
bigint), _col2 (type: bigint), _col3 (type: bigint)
+ Reducer 3
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
- aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ aggregations: sum(VALUE._col0), sum(VALUE._col1),
sum(VALUE._col2), sum(VALUE._col3)
mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE
Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE
Column stats: COMPLETE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 7
+ Reducer 4
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
+ aggregations: count(VALUE._col0)
keys: KEY._col0 (type: string)
mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE
Column stats: COMPLETE
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE
Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 12 Data size: 1032 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 12 Data size: 1128 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
Stage: Stage-0
Fetch Operator
@@ -1395,42 +2464,38 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
-FROM (SELECT a.key AS key, count(1) AS cnt
- FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
- FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
- ON (a.key = b.key)
- GROUP BY a.key) tmp
+PREHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)),
SUM(HASH(cnt2))
+FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2
+ FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON
(x.key = y.key)) a
+ JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by
z.key) b
+ ON (a.key = b.key)) tmp
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Input: default@src1
#### A masked pattern was here ####
-POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
-FROM (SELECT a.key AS key, count(1) AS cnt
- FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
- FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
- ON (a.key = b.key)
- GROUP BY a.key) tmp
+POSTHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)),
SUM(HASH(cnt2))
+FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2
+ FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON
(x.key = y.key)) a
+ JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by
z.key) b
+ ON (a.key = b.key)) tmp
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Input: default@src1
#### A masked pattern was here ####
-12744278 310
+1711763 3531902962 1711763 37
PREHOOK: query: EXPLAIN
-SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
-FROM (SELECT a.key AS key, count(1) AS cnt
- FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
- FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
- ON (a.key = b.key)
- GROUP BY a.key) tmp
+SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2))
+FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2
+ FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON
(x.key = y.key)) a
+ JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by
z.key) b
+ ON (a.key = b.key)) tmp
PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN
-SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
-FROM (SELECT a.key AS key, count(1) AS cnt
- FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
- FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
- ON (a.key = b.key)
- GROUP BY a.key) tmp
+SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2))
+FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2
+ FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON
(x.key = y.key)) a
+ JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by
z.key) b
+ ON (a.key = b.key)) tmp
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -1441,141 +2506,136 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE)
- Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
- Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
- Reducer 7 <- Map 6 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE)
+ Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE)
+ Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: x
- Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: key
- Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 4375 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
Group By Operator
+ aggregations: count(value)
keys: key (type: string)
mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 250 Data size: 21750 Basic stats:
COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 1128 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 21750 Basic
stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 12 Data size: 1128 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: no inputs
Map 6
Map Operator Tree:
TableScan
alias: y
- Statistics: Num rows: 25 Data size: 2150 Basic stats:
COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: key (type: string)
- outputColumnNames: key
- Statistics: Num rows: 25 Data size: 2150 Basic stats:
COMPLETE Column stats: COMPLETE
- Group By Operator
- keys: key (type: string)
- mode: hash
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 12 Data size: 1032 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 12 Data size: 1032 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 43500 Basic
stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: no inputs
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 250 Data size: 21750 Basic stats:
COMPLETE Column stats: COMPLETE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 18 Data size: 4842 Basic stats: COMPLETE
Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 21750 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 18 Data size: 4842 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string), _col2 (type:
string), _col3 (type: bigint)
Reducer 3
Execution mode: llap
Reduce Operator Tree:
Merge Join Operator
condition map:
- Outer Join 0 to 1
+ Inner Join 0 to 1
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 262 Data size: 1131 Basic stats:
COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: count()
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 13 Data size: 278 Basic stats:
COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 13 Data size: 278 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
- Reducer 4
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: string)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 13 Data size: 278 Basic stats: COMPLETE
Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 28 Data size: 7532 Basic stats: COMPLETE
Column stats: COMPLETE
Select Operator
- expressions: hash(_col0) (type: int), hash(_col1) (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 13 Data size: 278 Basic stats:
COMPLETE Column stats: COMPLETE
+ expressions: hash(_col0) (type: int), hash(_col1) (type:
int), hash(_col2) (type: int), hash(_col3) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 28 Data size: 7532 Basic stats:
COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: sum(_col0), sum(_col1)
+ aggregations: sum(_col0), sum(_col1), sum(_col2),
sum(_col3)
mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 32 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 16 Basic stats:
COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint), _col1 (type:
bigint)
- Reducer 5
+ Statistics: Num rows: 1 Data size: 32 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type:
bigint), _col2 (type: bigint), _col3 (type: bigint)
+ Reducer 4
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
- aggregations: sum(VALUE._col0), sum(VALUE._col1)
+ aggregations: sum(VALUE._col0), sum(VALUE._col1),
sum(VALUE._col2), sum(VALUE._col3)
mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE
Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE
Column stats: COMPLETE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 7
+ Reducer 5
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
+ aggregations: count(VALUE._col0)
keys: KEY._col0 (type: string)
mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE
Column stats: COMPLETE
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE
Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 12 Data size: 1032 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 12 Data size: 1128 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
Stage: Stage-0
Fetch Operator
@@ -1583,27 +2643,25 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
-FROM (SELECT a.key AS key, count(1) AS cnt
- FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
- FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
- ON (a.key = b.key)
- GROUP BY a.key) tmp
+PREHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)),
SUM(HASH(cnt2))
+FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2
+ FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON
(x.key = y.key)) a
+ JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by
z.key) b
+ ON (a.key = b.key)) tmp
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Input: default@src1
#### A masked pattern was here ####
-POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt))
-FROM (SELECT a.key AS key, count(1) AS cnt
- FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by
x.key) a
- FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y
group by y.key) b
- ON (a.key = b.key)
- GROUP BY a.key) tmp
+POSTHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)),
SUM(HASH(cnt2))
+FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2
+ FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON
(x.key = y.key)) a
+ JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by
z.key) b
+ ON (a.key = b.key)) tmp
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Input: default@src1
#### A masked pattern was here ####
-12744278 310
+1711763 3531902962 1711763 37
PREHOOK: query: EXPLAIN
SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2))
FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2
@@ -1790,9 +2848,10 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Reducer 4
(ONE_TO_ONE_EDGE)
- Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
- Reducer 4 <- Map 1 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE)
+ Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE)
+ Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1828,7 +2887,7 @@ STAGE PLANS:
value expressions: _col1 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: no inputs
- Map 5
+ Map 6
Map Operator Tree:
TableScan
alias: y
@@ -1854,17 +2913,32 @@ STAGE PLANS:
Merge Join Operator
condition map:
Inner Join 0 to 1
- Inner Join 0 to 2
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- 2 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 29 Data size: 7801 Basic stats: COMPLETE
Column stats: COMPLETE
+ Statistics: Num rows: 18 Data size: 4842 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 18 Data size: 4842 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string), _col2 (type:
string), _col3 (type: bigint)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 28 Data size: 7532 Basic stats: COMPLETE
Column stats: COMPLETE
Select Operator
expressions: hash(_col0) (type: int), hash(_col1) (type:
int), hash(_col2) (type: int), hash(_col3) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 29 Data size: 7801 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 28 Data size: 7532 Basic stats:
COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: sum(_col0), sum(_col1), sum(_col2),
sum(_col3)
mode: hash
@@ -1874,7 +2948,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 32 Basic stats:
COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint), _col1 (type:
bigint), _col2 (type: bigint), _col3 (type: bigint)
- Reducer 3
+ Reducer 4
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -1889,7 +2963,7 @@ STAGE PLANS:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 4
+ Reducer 5
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator