Repository: hive Updated Branches: refs/heads/master b30fe72e0 -> 6a776f599
HIVE-13475: Allow aggregate functions in over clause (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6a776f59 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6a776f59 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6a776f59 Branch: refs/heads/master Commit: 6a776f5998b1fc41c602b135c9e1ef04171f4b74 Parents: b30fe72 Author: Jesus Camacho Rodriguez <[email protected]> Authored: Fri Apr 15 12:59:39 2016 +0100 Committer: Jesus Camacho Rodriguez <[email protected]> Committed: Fri Apr 15 13:24:50 2016 +0100 ---------------------------------------------------------------------- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 5 + .../queries/clientpositive/windowing_gby2.q | 41 ++ .../results/clientpositive/windowing_gby2.q.out | 652 +++++++++++++++++++ 3 files changed, 698 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/6a776f59/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index d3e7040..329c617 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -566,7 +566,12 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { assert (expressionTree.getChildCount() != 0); if (expressionTree.getChild(expressionTree.getChildCount()-1).getType() == HiveParser.TOK_WINDOWSPEC) { + // If it is a windowing spec, we include it in the list + // Further, we will examine its children AST nodes to check whether + // there are aggregation functions within wdwFns.add(expressionTree); + doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(expressionTree.getChildCount()-1), + aggregations, wdwFns); return; } if (expressionTree.getChild(0).getType() == HiveParser.Identifier) { http://git-wip-us.apache.org/repos/asf/hive/blob/6a776f59/ql/src/test/queries/clientpositive/windowing_gby2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/windowing_gby2.q b/ql/src/test/queries/clientpositive/windowing_gby2.q new file mode 100644 index 0000000..920f723 --- /dev/null +++ b/ql/src/test/queries/clientpositive/windowing_gby2.q @@ -0,0 +1,41 @@ +set hive.mapred.mode=nonstrict; + +explain +select rank() over (order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by ws.key; + +select rank() over (order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by ws.key; + +explain +select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by cast(ws.key as int); + +select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by cast(ws.key as int); + +explain +select rank () over(partition by key order by sum(c_int - c_float) desc) , +dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc), +percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc) +from cbo_t3 +group by key, value; + +select rank () over(partition by key order by sum(c_int - c_float) desc) , +dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc), +percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc) +from cbo_t3 +group by key, value; + +explain +select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank +from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 +group by ws.c_boolean; + +select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank +from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 +group by ws.c_boolean; http://git-wip-us.apache.org/repos/asf/hive/blob/6a776f59/ql/src/test/results/clientpositive/windowing_gby2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/windowing_gby2.q.out b/ql/src/test/results/clientpositive/windowing_gby2.q.out new file mode 100644 index 0000000..4bd6994 --- /dev/null +++ b/ql/src/test/results/clientpositive/windowing_gby2.q.out @@ -0,0 +1,652 @@ +PREHOOK: query: explain +select rank() over (order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by ws.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select rank() over (order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by ws.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ws + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), c_int (type: int) + outputColumnNames: key, c_int + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(c_int) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: 0 (type: int), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: bigint + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rank_window_0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select rank() over (order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by ws.key +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +POSTHOOK: query: select rank() over (order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by ws.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +1 +2 +2 +2 +5 +5 +7 +PREHOOK: query: explain +select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by cast(ws.key as int) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by cast(ws.key as int) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ws + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string), c_int (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col1), sum(_col2) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: string, _col2: bigint + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST + partition by: _col1 + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col0 + name: avg + window function: GenericUDAFAverageEvaluatorDouble + window frame: PRECEDING(MAX)~ + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: avg_window_0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by cast(ws.key as int) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +POSTHOOK: query: select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by cast(ws.key as int) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +NULL +1.0 +2.0 +3.0 +PREHOOK: query: explain +select rank () over(partition by key order by sum(c_int - c_float) desc) , +dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc), +percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc) +from cbo_t3 +group by key, value +PREHOOK: type: QUERY +POSTHOOK: query: explain +select rank () over(partition by key order by sum(c_int - c_float) desc) , +dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc), +percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc) +from cbo_t3 +group by key, value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: cbo_t3 + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), (UDFToFloat(c_int) - c_float) (type: float), (UDFToDouble(c_float) / UDFToDouble(c_int)) (type: double), c_int (type: int), ((UDFToDouble(c_float) / UDFToDouble(c_int)) - UDFToDouble(c_int)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2), sum(_col3), max(_col4), sum(_col5) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double), _col3 (type: double), _col4 (type: int), _col5 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1), max(VALUE._col2), sum(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: double) + sort order: +- + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col3 (type: double), _col4 (type: int), _col5 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: int), VALUE._col3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: double, _col3: double, _col4: int, _col5: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 DESC NULLS LAST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col2 + name: rank + window function: GenericUDAFRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col3 (type: double), _col4 (type: int), _col5 (type: double), rank_window_0 (type: int) + outputColumnNames: _col1, _col3, _col4, _col5, rank_window_0 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: lower(_col1) (type: string), _col3 (type: double) + sort order: ++ + Map-reduce partition columns: lower(_col1) (type: string) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + value expressions: rank_window_0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: double) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col2 (type: string), KEY.reducesinkkey1 (type: double), VALUE._col4 (type: int), VALUE._col5 (type: double) + outputColumnNames: _col0, _col2, _col4, _col5, _col6 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col2: string, _col4: double, _col5: int, _col6: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col4 ASC NULLS FIRST + partition by: lower(_col2) + raw input shape: + window functions: + window function definition + alias: dense_rank_window_1 + arguments: _col4 + name: dense_rank + window function: GenericUDAFDenseRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col5 (type: int), _col6 (type: double), dense_rank_window_1 (type: int) + outputColumnNames: _col0, _col5, _col6, dense_rank_window_1 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col5 (type: int), _col6 (type: double) + sort order: ++ + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + value expressions: dense_rank_window_1 (type: int), _col0 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double) + outputColumnNames: _col0, _col1, _col6, _col7 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col6: int, _col7: double + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col7 ASC NULLS FIRST + partition by: _col6 + raw input shape: + window functions: + window function definition + alias: percent_rank_window_2 + arguments: _col7 + name: percent_rank + window function: GenericUDAFPercentRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: int), percent_rank_window_2 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select rank () over(partition by key order by sum(c_int - c_float) desc) , +dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc), +percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc) +from cbo_t3 +group by key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +POSTHOOK: query: select rank () over(partition by key order by sum(c_int - c_float) desc) , +dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc), +percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc) +from cbo_t3 +group by key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +1 1 0.0 +1 1 0.0 +1 1 0.0 +1 1 0.0 +1 1 0.0 +1 1 0.0 +1 1 0.0 +PREHOOK: query: explain +select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank +from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 +group by ws.c_boolean +PREHOOK: type: QUERY +POSTHOOK: query: explain +select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank +from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 +group by ws.c_boolean +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: ws + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), c_int (type: int), c_boolean (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: boolean) + TableScan + alias: wr + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cstring1 is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int), cstring1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: boolean), _col3 (type: int), _col1 (type: int) + outputColumnNames: _col2, _col3, _col1 + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col3), sum(_col1) + keys: _col2 (type: boolean) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: boolean) + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: 0 (type: int), (UDFToDouble(_col1) / UDFToDouble(_col2)) (type: double) + sort order: ++ + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: bigint, _col2: bigint + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: (UDFToDouble(_col1) / UDFToDouble(_col2)) ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: (UDFToDouble(_col1) / UDFToDouble(_col2)) + name: rank + window function: GenericUDAFRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rank_window_0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank +from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 +group by ws.c_boolean +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Input: default@cbo_t3 +#### A masked pattern was here #### +POSTHOOK: query: select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank +from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 +group by ws.c_boolean +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Input: default@cbo_t3 +#### A masked pattern was here ####
