hive git commit: HIVE-13475: Allow aggregate functions in over clause (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

jcamacho Fri, 15 Apr 2016 05:25:34 -0700

Repository: hive
Updated Branches:
  refs/heads/master b30fe72e0 -> 6a776f599



HIVE-13475: Allow aggregate functions in over clause (Jesus Camacho Rodriguez, 
reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6a776f59
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6a776f59
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6a776f59

Branch: refs/heads/master
Commit: 6a776f5998b1fc41c602b135c9e1ef04171f4b74
Parents: b30fe72
Author: Jesus Camacho Rodriguez <[email protected]>
Authored: Fri Apr 15 12:59:39 2016 +0100
Committer: Jesus Camacho Rodriguez <[email protected]>
Committed: Fri Apr 15 13:24:50 2016 +0100

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |   5 +
 .../queries/clientpositive/windowing_gby2.q     |  41 ++
 .../results/clientpositive/windowing_gby2.q.out | 652 +++++++++++++++++++
 3 files changed, 698 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/6a776f59/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index d3e7040..329c617 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -566,7 +566,12 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer 
{
       assert (expressionTree.getChildCount() != 0);
       if (expressionTree.getChild(expressionTree.getChildCount()-1).getType()
           == HiveParser.TOK_WINDOWSPEC) {
+        // If it is a windowing spec, we include it in the list
+        // Further, we will examine its children AST nodes to check whether
+        // there are aggregation functions within
         wdwFns.add(expressionTree);
+        doPhase1GetAllAggregations((ASTNode) 
expressionTree.getChild(expressionTree.getChildCount()-1),
+                aggregations, wdwFns);
         return;
       }
       if (expressionTree.getChild(0).getType() == HiveParser.Identifier) {

http://git-wip-us.apache.org/repos/asf/hive/blob/6a776f59/ql/src/test/queries/clientpositive/windowing_gby2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/windowing_gby2.q 
b/ql/src/test/queries/clientpositive/windowing_gby2.q
new file mode 100644
index 0000000..920f723
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/windowing_gby2.q
@@ -0,0 +1,41 @@
+set hive.mapred.mode=nonstrict;
+
+explain
+select rank() over (order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by ws.key;
+
+select rank() over (order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by ws.key;
+
+explain
+select avg(cast(ws.key as int)) over (partition by min(ws.value) order by 
sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by cast(ws.key as int);
+
+select avg(cast(ws.key as int)) over (partition by min(ws.value) order by 
sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by cast(ws.key as int);
+
+explain
+select rank () over(partition by key order by sum(c_int - c_float) desc) ,
+dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc),
+percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - 
c_int) asc)
+from cbo_t3
+group by key, value;
+
+select rank () over(partition by key order by sum(c_int - c_float) desc) ,
+dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc),
+percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - 
c_int) asc)
+from cbo_t3
+group by key, value;
+
+explain
+select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank
+from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1
+group by ws.c_boolean;
+
+select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank
+from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1
+group by ws.c_boolean;

http://git-wip-us.apache.org/repos/asf/hive/blob/6a776f59/ql/src/test/results/clientpositive/windowing_gby2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/windowing_gby2.q.out 
b/ql/src/test/results/clientpositive/windowing_gby2.q.out
new file mode 100644
index 0000000..4bd6994
--- /dev/null
+++ b/ql/src/test/results/clientpositive/windowing_gby2.q.out
@@ -0,0 +1,652 @@
+PREHOOK: query: explain
+select rank() over (order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by ws.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select rank() over (order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by ws.key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: ws
+            Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: key (type: string), c_int (type: int)
+              outputColumnNames: key, c_int
+              Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: sum(c_int)
+                keys: key (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 20 Data size: 262 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column 
stats: NONE
+          Select Operator
+            expressions: _col1 (type: bigint)
+            outputColumnNames: _col1
+            Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: 0 (type: int), _col1 (type: bigint)
+              sort order: ++
+              Map-reduce partition columns: 0 (type: int)
+              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey1 (type: bigint)
+          outputColumnNames: _col1
+          Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column 
stats: NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col1: bigint
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: _col1 ASC NULLS FIRST
+                  partition by: 0
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: rank_window_0
+                        arguments: _col1
+                        name: rank
+                        window function: GenericUDAFRankEvaluator
+                        window frame: PRECEDING(MAX)~FOLLOWING(MAX)
+                        isPivotResult: true
+            Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: rank_window_0 (type: int)
+              outputColumnNames: _col0
+              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+                table:
+                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select rank() over (order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by ws.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+POSTHOOK: query: select rank() over (order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by ws.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+1
+2
+2
+2
+5
+5
+7
+PREHOOK: query: explain
+select avg(cast(ws.key as int)) over (partition by min(ws.value) order by 
sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by cast(ws.key as int)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select avg(cast(ws.key as int)) over (partition by min(ws.value) order by 
sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by cast(ws.key as int)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: ws
+            Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: UDFToInteger(key) (type: int), value (type: 
string), c_int (type: int)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: min(_col1), sum(_col2)
+                keys: _col0 (type: int)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 20 Data size: 262 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: string), _col2 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: min(VALUE._col0), sum(VALUE._col1)
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col1 (type: string), _col2 (type: bigint)
+              sort order: ++
+              Map-reduce partition columns: _col1 (type: string)
+              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: int)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: 
string), KEY.reducesinkkey1 (type: bigint)
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column 
stats: NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col0: int, _col1: string, _col2: bigint
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: _col2 ASC NULLS FIRST
+                  partition by: _col1
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: avg_window_0
+                        arguments: _col0
+                        name: avg
+                        window function: GenericUDAFAverageEvaluatorDouble
+                        window frame: PRECEDING(MAX)~
+            Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: avg_window_0 (type: double)
+              outputColumnNames: _col0
+              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+                table:
+                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select avg(cast(ws.key as int)) over (partition by 
min(ws.value) order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by cast(ws.key as int)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+POSTHOOK: query: select avg(cast(ws.key as int)) over (partition by 
min(ws.value) order by sum(ws.c_int)) as return_rank
+from cbo_t3 ws
+group by cast(ws.key as int)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+NULL
+1.0
+2.0
+3.0
+PREHOOK: query: explain
+select rank () over(partition by key order by sum(c_int - c_float) desc) ,
+dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc),
+percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - 
c_int) asc)
+from cbo_t3
+group by key, value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select rank () over(partition by key order by sum(c_int - c_float) desc) ,
+dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc),
+percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - 
c_int) asc)
+from cbo_t3
+group by key, value
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-3 depends on stages: Stage-2
+  Stage-4 depends on stages: Stage-3
+  Stage-0 depends on stages: Stage-4
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: cbo_t3
+            Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string), 
(UDFToFloat(c_int) - c_float) (type: float), (UDFToDouble(c_float) / 
UDFToDouble(c_int)) (type: double), c_int (type: int), ((UDFToDouble(c_float) / 
UDFToDouble(c_int)) - UDFToDouble(c_int)) (type: double)
+              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+              Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: sum(_col2), sum(_col3), max(_col4), sum(_col5)
+                keys: _col0 (type: string), _col1 (type: string)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
+                  Statistics: Num rows: 20 Data size: 262 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col2 (type: double), _col3 (type: 
double), _col4 (type: int), _col5 (type: double)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0), sum(VALUE._col1), max(VALUE._col2), 
sum(VALUE._col3)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+          Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col2 (type: double)
+              sort order: +-
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col1 (type: string), _col3 (type: double), 
_col4 (type: int), _col5 (type: double)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: 
string), KEY.reducesinkkey1 (type: double), VALUE._col1 (type: double), 
VALUE._col2 (type: int), VALUE._col3 (type: double)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+          Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column 
stats: NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col0: string, _col1: string, _col2: double, 
_col3: double, _col4: int, _col5: double
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: _col2 DESC NULLS LAST
+                  partition by: _col0
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: rank_window_0
+                        arguments: _col2
+                        name: rank
+                        window function: GenericUDAFRankEvaluator
+                        window frame: PRECEDING(MAX)~FOLLOWING(MAX)
+                        isPivotResult: true
+            Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: _col1 (type: string), _col3 (type: double), _col4 
(type: int), _col5 (type: double), rank_window_0 (type: int)
+              outputColumnNames: _col1, _col3, _col4, _col5, rank_window_0
+              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: lower(_col1) (type: string), _col3 (type: 
double)
+              sort order: ++
+              Map-reduce partition columns: lower(_col1) (type: string)
+              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: rank_window_0 (type: int), _col1 (type: 
string), _col4 (type: int), _col5 (type: double)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: int), VALUE._col2 (type: string), 
KEY.reducesinkkey1 (type: double), VALUE._col4 (type: int), VALUE._col5 (type: 
double)
+          outputColumnNames: _col0, _col2, _col4, _col5, _col6
+          Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column 
stats: NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col0: int, _col2: string, _col4: double, 
_col5: int, _col6: double
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: _col4 ASC NULLS FIRST
+                  partition by: lower(_col2)
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: dense_rank_window_1
+                        arguments: _col4
+                        name: dense_rank
+                        window function: GenericUDAFDenseRankEvaluator
+                        window frame: PRECEDING(MAX)~FOLLOWING(MAX)
+                        isPivotResult: true
+            Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: int), _col5 (type: int), _col6 (type: 
double), dense_rank_window_1 (type: int)
+              outputColumnNames: _col0, _col5, _col6, dense_rank_window_1
+              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col5 (type: int), _col6 (type: double)
+              sort order: ++
+              Map-reduce partition columns: _col5 (type: int)
+              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: dense_rank_window_1 (type: int), _col0 (type: 
int)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), 
KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double)
+          outputColumnNames: _col0, _col1, _col6, _col7
+          Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column 
stats: NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col0: int, _col1: int, _col6: int, _col7: 
double
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: _col7 ASC NULLS FIRST
+                  partition by: _col6
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: percent_rank_window_2
+                        arguments: _col7
+                        name: percent_rank
+                        window function: GenericUDAFPercentRankEvaluator
+                        window frame: PRECEDING(MAX)~FOLLOWING(MAX)
+                        isPivotResult: true
+            Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: _col1 (type: int), _col0 (type: int), 
percent_rank_window_2 (type: double)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE 
Column stats: NONE
+                table:
+                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select rank () over(partition by key order by sum(c_int - 
c_float) desc) ,
+dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc),
+percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - 
c_int) asc)
+from cbo_t3
+group by key, value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+POSTHOOK: query: select rank () over(partition by key order by sum(c_int - 
c_float) desc) ,
+dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc),
+percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - 
c_int) asc)
+from cbo_t3
+group by key, value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+1      1       0.0
+1      1       0.0
+1      1       0.0
+1      1       0.0
+1      1       0.0
+1      1       0.0
+1      1       0.0
+PREHOOK: query: explain
+select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank
+from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1
+group by ws.c_boolean
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank
+from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1
+group by ws.c_boolean
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-3 depends on stages: Stage-2
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: ws
+            Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE 
Column stats: NONE
+            Filter Operator
+              predicate: value is not null (type: boolean)
+              Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: value (type: string), c_int (type: int), 
c_boolean (type: boolean)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 20 Data size: 262 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: int), _col2 (type: boolean)
+          TableScan
+            alias: wr
+            Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: cstring1 is not null (type: boolean)
+              Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+              Select Operator
+                expressions: cint (type: int), cstring1 (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col1 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col1 (type: string)
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: int)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col1 (type: string)
+          outputColumnNames: _col1, _col2, _col3
+          Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: _col2 (type: boolean), _col3 (type: int), _col1 
(type: int)
+            outputColumnNames: _col2, _col3, _col1
+            Statistics: Num rows: 13516 Data size: 2906160 Basic stats: 
COMPLETE Column stats: NONE
+            Group By Operator
+              aggregations: sum(_col3), sum(_col1)
+              keys: _col2 (type: boolean)
+              mode: hash
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 13516 Data size: 2906160 Basic stats: 
COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: boolean)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: boolean)
+              Statistics: Num rows: 13516 Data size: 2906160 Basic stats: 
COMPLETE Column stats: NONE
+              value expressions: _col1 (type: bigint), _col2 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: sum(VALUE._col0), sum(VALUE._col1)
+          keys: KEY._col0 (type: boolean)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: _col1 (type: bigint), _col2 (type: bigint)
+            outputColumnNames: _col1, _col2
+            Statistics: Num rows: 6758 Data size: 1453080 Basic stats: 
COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: 0 (type: int), (UDFToDouble(_col1) / 
UDFToDouble(_col2)) (type: double)
+              sort order: ++
+              Map-reduce partition columns: 0 (type: int)
+              Statistics: Num rows: 6758 Data size: 1453080 Basic stats: 
COMPLETE Column stats: NONE
+              value expressions: _col1 (type: bigint), _col2 (type: bigint)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col1 (type: bigint), VALUE._col2 (type: bigint)
+          outputColumnNames: _col1, _col2
+          Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE 
Column stats: NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col1: bigint, _col2: bigint
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: (UDFToDouble(_col1) / UDFToDouble(_col2)) ASC 
NULLS FIRST
+                  partition by: 0
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: rank_window_0
+                        arguments: (UDFToDouble(_col1) / UDFToDouble(_col2))
+                        name: rank
+                        window function: GenericUDAFRankEvaluator
+                        window frame: PRECEDING(MAX)~FOLLOWING(MAX)
+                        isPivotResult: true
+            Statistics: Num rows: 6758 Data size: 1453080 Basic stats: 
COMPLETE Column stats: NONE
+            Select Operator
+              expressions: rank_window_0 (type: int)
+              outputColumnNames: _col0
+              Statistics: Num rows: 6758 Data size: 1453080 Basic stats: 
COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 6758 Data size: 1453080 Basic stats: 
COMPLETE Column stats: NONE
+                table:
+                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as 
return_rank
+from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1
+group by ws.c_boolean
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####
+POSTHOOK: query: select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as 
return_rank
+from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1
+group by ws.c_boolean
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Input: default@cbo_t3
+#### A masked pattern was here ####

hive git commit: HIVE-13475: Allow aggregate functions in over clause (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

Reply via email to