Repository: hive Updated Branches: refs/heads/master d97e4e2c9 -> 076b6cccc
http://git-wip-us.apache.org/repos/asf/hive/blob/076b6ccc/ql/src/test/results/clientpositive/tez/count.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/count.q.out b/ql/src/test/results/clientpositive/tez/count.q.out index 9fc2c75..9dc2764 100644 --- a/ql/src/test/results/clientpositive/tez/count.q.out +++ b/ql/src/test/results/clientpositive/tez/count.q.out @@ -292,6 +292,74 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@abcd #### A masked pattern was here #### 7 7 6 6 6 7 3 3 6 7 4 5 6 6 5 6 4 5 5 5 4 +PREHOOK: query: --first aggregation with literal. gbinfo was generating wrong expression +explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: type: QUERY +POSTHOOK: query: --first aggregation with literal. gbinfo was generating wrong expression +explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int), c (type: int), d (type: int) + outputColumnNames: $f1, $f2, $f3, $f4 + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1), count(), count($f1), count($f2), count($f3), count($f4), count(DISTINCT $f1), count(DISTINCT $f2), count(DISTINCT $f3), count(DISTINCT $f4), count(DISTINCT $f1, $f2), count(DISTINCT $f2, $f3), count(DISTINCT $f3, $f4), count(DISTINCT $f1, $f4), count(DISTINCT $f1, $f3), count(DISTINCT $f2, $f4), count(DISTINCT $f1, $f2, $f3), count(DISTINCT $f2, $f3, $f4), count(DISTINCT $f1, $f3, $f4), count(DISTINCT $f1, $f2, $f4), count(DISTINCT $f1, $f2, $f3, $f4) + keys: $f1 (type: int), $f2 (type: int), $f3 (type: int), $f4 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + sort order: ++++ + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY. _col0:14._col3) + mode: mergepartial + outputColumnNames: $f0, $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13, $f14, $f15, $f16, $f17, $f18, $f19, $f20 + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +7 7 6 6 6 7 3 3 6 7 4 5 6 6 5 6 4 5 5 5 4 PREHOOK: query: explain select count(distinct b) from abcd group by a PREHOOK: type: QUERY POSTHOOK: query: explain select count(distinct b) from abcd group by a @@ -498,10 +566,11 @@ STAGE PLANS: sort order: + Map-reduce partition columns: c (type: int) Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + value expressions: b (type: int) Reducer 3 Reduce Operator Tree: Group By Operator - aggregations: count(KEY._col0) + aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: complete outputColumnNames: c, $f1 @@ -559,31 +628,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: abcd - Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c (type: int), d (type: int) - outputColumnNames: c, d - Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + expressions: b (type: int), c (type: int), d (type: int) + outputColumnNames: b, c, d + Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: d (type: int), c (type: int) sort order: ++ Map-reduce partition columns: d (type: int) - Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE Column stats: NONE + value expressions: b (type: int) Reducer 2 Reduce Operator Tree: Group By Operator - aggregations: count(KEY._col1:0._col0), count(DISTINCT KEY._col1:0._col0) + aggregations: count(VALUE._col0), count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: int) mode: complete outputColumnNames: d, $f1, $f2 - Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: $f1 (type: bigint), $f2 (type: bigint) outputColumnNames: _o__c0, _o__c1 - Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -603,10 +673,205 @@ POSTHOOK: query: select count(b), count(distinct c) from abcd group by d POSTHOOK: type: QUERY POSTHOOK: Input: default@abcd #### A masked pattern was here #### -0 0 -1 1 +0 1 +1 0 1 1 1 1 1 1 1 1 1 1 +PREHOOK: query: --non distinct aggregate with same column as group by key +explain select a, count(distinct b), count(distinct c), sum(d), sum(d+d), sum(d*3), sum(b), sum(c), sum(a), sum(distinct a), sum(distinct b) from abcd group by a +PREHOOK: type: QUERY +POSTHOOK: query: --non distinct aggregate with same column as group by key +explain select a, count(distinct b), count(distinct c), sum(d), sum(d+d), sum(d*3), sum(b), sum(c), sum(a), sum(distinct a), sum(distinct b) from abcd group by a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int), c (type: int), d (type: int), (d + d) (type: int), (d * 3) (type: int) + outputColumnNames: $f0, $f1, $f2, $f3, $f4, $f5 + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: $f0 (type: int), $f1 (type: int), $f2 (type: int) + sort order: +++ + Map-reduce partition columns: $f0 (type: int) + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + value expressions: $f3 (type: int), $f4 (type: int), $f5 (type: int) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0), sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(KEY._col1:0._col0), sum(KEY._col1:1._col0), sum(KEY._col0), sum(DISTINCT KEY._col1:2._col0), sum(DISTINCT KEY._col1:3._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: $f0, $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10 + Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a, count(distinct b), count(distinct c), sum(d), sum(d+d), sum(d*3), sum(b), sum(c), sum(a), sum(distinct a), sum(distinct b) from abcd group by a +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select a, count(distinct b), count(distinct c), sum(d), sum(d+d), sum(d*3), sum(b), sum(c), sum(a), sum(distinct a), sum(distinct b) from abcd group by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +10 2 2 10 20 30 1200 95 30 10 1100 +100 1 1 3 6 9 100 10 100 100 100 +12 1 2 9 18 27 100 155 24 12 100 +NULL 1 1 6 12 18 35 23 NULL NULL 35 +PREHOOK: query: --non distinct aggregate with same column as distinct aggregate +explain select a, count(distinct b), count(distinct c), sum(d), sum(c) from abcd group by a +PREHOOK: type: QUERY +POSTHOOK: query: --non distinct aggregate with same column as distinct aggregate +explain select a, count(distinct b), count(distinct c), sum(d), sum(c) from abcd group by a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int), c (type: int), d (type: int) + outputColumnNames: a, b, c, d + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: a (type: int), b (type: int), c (type: int) + sort order: +++ + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + value expressions: d (type: int) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0), sum(VALUE._col0), sum(KEY._col1:1._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: a, $f1, $f2, $f3, $f4 + Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a, count(distinct b), count(distinct c), sum(d), sum(c) from abcd group by a +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select a, count(distinct b), count(distinct c), sum(d), sum(c) from abcd group by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +10 2 2 10 95 +100 1 1 3 10 +12 1 2 9 155 +NULL 1 1 6 23 +PREHOOK: query: --aggregation with literal +explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: type: QUERY +POSTHOOK: query: --aggregation with literal +explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int), c (type: int), d (type: int) + outputColumnNames: $f1, $f2, $f3, $f4 + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: $f1 (type: int), $f2 (type: int), $f3 (type: int), $f4 (type: int) + sort order: ++++ + Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(1), count(), count(KEY._col0:0._col0), count(KEY._col0:1._col0), count(KEY._col0:2._col0), count(KEY._col0:3._col0), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, K EY._col0:14._col3) + mode: complete + outputColumnNames: $f0, $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13, $f14, $f15, $f16, $f17, $f18, $f19, $f20 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +7 7 6 6 6 7 3 3 6 7 4 5 6 6 5 6 4 5 5 5 4
