HIVE-14959: Fix DISTINCT with windowing when CBO is enabled/disabled (Jesus 
Camacho Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/36e810fa
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/36e810fa
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/36e810fa

Branch: refs/heads/hive-14535
Commit: 36e810fa6fd1b353ee1d9907927aa472ae53dd48
Parents: 2cae736
Author: Jesus Camacho Rodriguez <jcama...@apache.org>
Authored: Thu Oct 13 22:04:37 2016 +0100
Committer: Jesus Camacho Rodriguez <jcama...@apache.org>
Committed: Mon Oct 17 20:20:54 2016 +0100

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/CalcitePlanner.java    |   6 +
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  37 +-
 .../clientpositive/distinct_windowing_no_cbo.q  |  63 ++
 ql/src/test/queries/clientpositive/windowing.q  |   6 +
 .../distinct_windowing_no_cbo.q.out             | 796 +++++++++++++++++++
 .../llap/cbo_rp_windowing_2.q.out               |   5 +-
 .../results/clientpositive/llap/windowing.q.out | 110 ++-
 .../clientpositive/spark/windowing.q.out        | 105 ++-
 8 files changed, 1103 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/36e810fa/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index e6ab947..cf66ad9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -2366,6 +2366,12 @@ public class CalcitePlanner extends SemanticAnalyzer {
         }
       }
 
+      // Select DISTINCT + windowing; GBy handled by genSelectForWindowing
+      if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI &&
+              !qb.getAllWindowingSpecs().isEmpty()) {
+        return null;
+      }
+
       List<ASTNode> grpByAstExprs = getGroupByForClause(qbp, detsClauseName);
       HashMap<String, ASTNode> aggregationTrees = 
qbp.getAggregationExprsForClause(detsClauseName);
       boolean hasGrpByAstExprs = (grpByAstExprs != null && 
!grpByAstExprs.isEmpty()) ? true : false;

http://git-wip-us.apache.org/repos/asf/hive/blob/36e810fa/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 747f387..9d58193 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -3814,24 +3814,12 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
       List<ASTNode> result = new ArrayList<ASTNode>(selectExprs == null ? 0
           : selectExprs.getChildCount());
       if (selectExprs != null) {
-        HashMap<String, ASTNode> windowingExprs = 
parseInfo.getWindowingExprsForClause(dest);
-
         for (int i = 0; i < selectExprs.getChildCount(); ++i) {
           if (((ASTNode) selectExprs.getChild(i)).getToken().getType() == 
HiveParser.TOK_HINTLIST) {
             continue;
           }
           // table.column AS alias
           ASTNode grpbyExpr = (ASTNode) selectExprs.getChild(i).getChild(0);
-          /*
-           * If this is handled by Windowing then ignore it.
-           */
-          if (windowingExprs != null && 
windowingExprs.containsKey(grpbyExpr.toStringTree())) {
-            if (!isCBOExecuted()) {
-              throw new SemanticException("SELECT DISTINCT not allowed in the 
presence of windowing"
-                      + " functions when CBO is off");
-            }
-            continue;
-          }
           result.add(grpbyExpr);
         }
       }
@@ -9316,8 +9304,10 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
             // Preserve operator before the GBY - we'll use it to resolve '*'
             Operator<?> gbySource = curr;
 
-            if (qbp.getAggregationExprsForClause(dest).size() != 0
-                || getGroupByForClause(qbp, dest).size() > 0) {
+            if ((qbp.getAggregationExprsForClause(dest).size() != 0
+                || getGroupByForClause(qbp, dest).size() > 0)
+                    && (qbp.getSelForClause(dest).getToken().getType() != 
HiveParser.TOK_SELECTDI
+                      || qbp.getWindowingExprsForClause(dest) == null)) {
               // multiple distincts is not supported with skew in data
               if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) &&
                   qbp.getDistinctFuncExprsForClause(dest).size() > 1) {
@@ -9401,12 +9391,29 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
       curr = genHavingPlan(dest, qb, curr, aliasToOpInfo);
     }
 
-
     if(queryProperties.hasWindowing() && qb.getWindowingSpec(dest) != null) {
       curr = genWindowingPlan(qb.getWindowingSpec(dest), curr);
+      // GBy for DISTINCT after windowing
+      if ((qbp.getAggregationExprsForClause(dest).size() != 0
+              || getGroupByForClause(qbp, dest).size() > 0)
+                  && qbp.getSelForClause(dest).getToken().getType() == 
HiveParser.TOK_SELECTDI
+                  && qbp.getWindowingExprsForClause(dest) != null) {
+        if (conf.getBoolVar(HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE)) {
+          if (!conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) {
+            curr = genGroupByPlanMapAggrNoSkew(dest, qb, curr);
+          } else {
+            curr = genGroupByPlanMapAggr2MR(dest, qb, curr);
+          }
+        } else if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) {
+          curr = genGroupByPlan2MR(dest, qb, curr);
+        } else {
+          curr = genGroupByPlan1MR(dest, qb, curr);
+        }
+      }
     }
 
     curr = genSelectPlan(dest, qb, curr, gbySource);
+
     Integer limit = qbp.getDestLimit(dest);
     Integer offset = (qbp.getDestLimitOffset(dest) == null) ? 0 : 
qbp.getDestLimitOffset(dest);
 

http://git-wip-us.apache.org/repos/asf/hive/blob/36e810fa/ql/src/test/queries/clientpositive/distinct_windowing_no_cbo.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/distinct_windowing_no_cbo.q 
b/ql/src/test/queries/clientpositive/distinct_windowing_no_cbo.q
new file mode 100644
index 0000000..029974d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/distinct_windowing_no_cbo.q
@@ -0,0 +1,63 @@
+set hive.cbo.enable=false;
+
+drop table over10k;
+
+create table over10k(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+       row format delimited
+       fields terminated by '|';
+
+load data local inpath '../../data/files/over10k' into table over10k;
+
+explain
+select distinct first_value(t) over ( partition by si order by i ) from 
over10k limit 10;
+
+select distinct first_value(t) over ( partition by si order by i ) from 
over10k limit 10;
+
+explain
+select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10;
+
+select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10;
+
+explain
+select distinct last_value(i) over ( partition by si order by i ),
+                first_value(t)  over ( partition by si order by i )
+from over10k limit 50;
+
+select distinct last_value(i) over ( partition by si order by i ),
+                first_value(t)  over ( partition by si order by i )
+from over10k limit 50;
+
+explain
+select si, max(f) mf, rank() over ( partition by si order by mf )
+FROM over10k
+GROUP BY si
+HAVING max(f) > 0
+limit 50;
+
+select si, max(f) mf, rank() over ( partition by si order by mf )
+FROM over10k
+GROUP BY si
+HAVING max(f) > 0
+limit 50;
+
+explain
+select distinct si, rank() over ( partition by si order by i )
+FROM over10k
+limit 50;
+
+select distinct si, rank() over ( partition by si order by i )
+FROM over10k
+limit 50;

http://git-wip-us.apache.org/repos/asf/hive/blob/36e810fa/ql/src/test/queries/clientpositive/windowing.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/windowing.q 
b/ql/src/test/queries/clientpositive/windowing.q
index 4a8383a..e60a6ef 100644
--- a/ql/src/test/queries/clientpositive/windowing.q
+++ b/ql/src/test/queries/clientpositive/windowing.q
@@ -362,6 +362,12 @@ window w1 as (distribute by p_mfgr sort by p_name range 
between 2 preceding and
        w3 as (distribute by p_mfgr sort by p_name range between unbounded 
preceding and current row); 
 
 -- 35. testDistinctWithWindowing
+explain
+select DISTINCT p_mfgr, p_name, p_size,
+sum(p_size) over w1 as s
+from part
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 
2 following);
+
 select DISTINCT p_mfgr, p_name, p_size,
 sum(p_size) over w1 as s
 from part

http://git-wip-us.apache.org/repos/asf/hive/blob/36e810fa/ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out 
b/ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out
new file mode 100644
index 0000000..0e9091e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out
@@ -0,0 +1,796 @@
+PREHOOK: query: drop table over10k
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table over10k
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table over10k(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+       row format delimited
+       fields terminated by '|'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@over10k
+POSTHOOK: query: create table over10k(
+           t tinyint,
+           si smallint,
+           i int,
+           b bigint,
+           f float,
+           d double,
+           bo boolean,
+           s string,
+           ts timestamp,
+           dec decimal(4,2),
+           bin binary)
+       row format delimited
+       fields terminated by '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@over10k
+PREHOOK: query: load data local inpath '../../data/files/over10k' into table 
over10k
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@over10k
+POSTHOOK: query: load data local inpath '../../data/files/over10k' into table 
over10k
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@over10k
+PREHOOK: query: explain
+select distinct first_value(t) over ( partition by si order by i ) from 
over10k limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select distinct first_value(t) over ( partition by si order by i ) from 
over10k limit 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: over10k
+            Statistics: Num rows: 84795 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+            Reduce Output Operator
+              key expressions: si (type: smallint), i (type: int)
+              sort order: ++
+              Map-reduce partition columns: si (type: smallint)
+              Statistics: Num rows: 84795 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+              value expressions: t (type: tinyint)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: tinyint), KEY.reducesinkkey0 (type: 
smallint), KEY.reducesinkkey1 (type: int)
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE 
Column stats: NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col0: tinyint, _col1: smallint, _col2: int
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: _col2 ASC NULLS FIRST
+                  partition by: _col1
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: first_value_window_0
+                        arguments: _col0
+                        name: first_value
+                        window function: GenericUDAFFirstValueEvaluator
+                        window frame: PRECEDING(MAX)~
+            Statistics: Num rows: 84795 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+            Select Operator
+              expressions: first_value_window_0 (type: tinyint)
+              outputColumnNames: first_value_window_0
+              Statistics: Num rows: 84795 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+              Group By Operator
+                keys: first_value_window_0 (type: tinyint)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 84795 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: tinyint)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: tinyint)
+              Statistics: Num rows: 84795 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+              TopN Hash Memory Usage: 0.1
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: tinyint)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 42397 Data size: 508765 Basic stats: COMPLETE 
Column stats: NONE
+          Limit
+            Number of rows: 10
+            Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 10
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select distinct first_value(t) over ( partition by si order by 
i ) from over10k limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct first_value(t) over ( partition by si order 
by i ) from over10k limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+-2
+-1
+0
+1
+2
+3
+4
+6
+7
+8
+PREHOOK: query: explain
+select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select distinct last_value(i) over ( partition by si order by i )
+from over10k limit 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: over10k
+            Statistics: Num rows: 127193 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+            Reduce Output Operator
+              key expressions: si (type: smallint), i (type: int)
+              sort order: ++
+              Map-reduce partition columns: si (type: smallint)
+              Statistics: Num rows: 127193 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 
(type: int)
+          outputColumnNames: _col1, _col2
+          Statistics: Num rows: 127193 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col1: smallint, _col2: int
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: _col2 ASC NULLS FIRST
+                  partition by: _col1
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: last_value_window_0
+                        arguments: _col2
+                        name: last_value
+                        window function: GenericUDAFLastValueEvaluator
+                        window frame: PRECEDING(MAX)~
+            Statistics: Num rows: 127193 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+            Select Operator
+              expressions: last_value_window_0 (type: int)
+              outputColumnNames: last_value_window_0
+              Statistics: Num rows: 127193 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+              Group By Operator
+                keys: last_value_window_0 (type: int)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 127193 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: int)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: int)
+              Statistics: Num rows: 127193 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+              TopN Hash Memory Usage: 0.1
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 63596 Data size: 508768 Basic stats: COMPLETE 
Column stats: NONE
+          Limit
+            Number of rows: 10
+            Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 10
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select distinct last_value(i) over ( partition by si order by 
i )
+from over10k limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct last_value(i) over ( partition by si order by 
i )
+from over10k limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+65536
+65537
+65538
+65539
+65540
+65541
+65542
+65543
+65544
+65545
+PREHOOK: query: explain
+select distinct last_value(i) over ( partition by si order by i ),
+                first_value(t)  over ( partition by si order by i )
+from over10k limit 50
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select distinct last_value(i) over ( partition by si order by i ),
+                first_value(t)  over ( partition by si order by i )
+from over10k limit 50
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: over10k
+            Statistics: Num rows: 84795 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+            Reduce Output Operator
+              key expressions: si (type: smallint), i (type: int)
+              sort order: ++
+              Map-reduce partition columns: si (type: smallint)
+              Statistics: Num rows: 84795 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+              value expressions: t (type: tinyint)
+      Reduce Operator Tree:
+        Select Operator
+          expressions: VALUE._col0 (type: tinyint), KEY.reducesinkkey0 (type: 
smallint), KEY.reducesinkkey1 (type: int)
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE 
Column stats: NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col0: tinyint, _col1: smallint, _col2: int
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: _col2 ASC NULLS FIRST
+                  partition by: _col1
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: last_value_window_0
+                        arguments: _col2
+                        name: last_value
+                        window function: GenericUDAFLastValueEvaluator
+                        window frame: PRECEDING(MAX)~
+                      window function definition
+                        alias: first_value_window_1
+                        arguments: _col0
+                        name: first_value
+                        window function: GenericUDAFFirstValueEvaluator
+                        window frame: PRECEDING(MAX)~
+            Statistics: Num rows: 84795 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+            Select Operator
+              expressions: last_value_window_0 (type: int), 
first_value_window_1 (type: tinyint)
+              outputColumnNames: last_value_window_0, first_value_window_1
+              Statistics: Num rows: 84795 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+              Group By Operator
+                keys: last_value_window_0 (type: int), first_value_window_1 
(type: tinyint)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 84795 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: int), _col1 (type: tinyint)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: int), _col1 (type: 
tinyint)
+              Statistics: Num rows: 84795 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+              TopN Hash Memory Usage: 0.1
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: int), KEY._col1 (type: tinyint)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 42397 Data size: 508765 Basic stats: COMPLETE 
Column stats: NONE
+          Limit
+            Number of rows: 50
+            Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 50
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select distinct last_value(i) over ( partition by si order by 
i ),
+                first_value(t)  over ( partition by si order by i )
+from over10k limit 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct last_value(i) over ( partition by si order by 
i ),
+                first_value(t)  over ( partition by si order by i )
+from over10k limit 50
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+65536  -2
+65536  2
+65536  9
+65536  12
+65536  13
+65536  18
+65536  22
+65536  23
+65536  27
+65536  37
+65536  39
+65536  42
+65536  48
+65536  55
+65536  56
+65536  58
+65536  61
+65536  69
+65536  71
+65536  73
+65536  75
+65536  78
+65536  80
+65536  83
+65536  84
+65536  88
+65536  94
+65536  104
+65536  107
+65536  108
+65536  111
+65536  114
+65536  118
+65536  119
+65536  121
+65537  4
+65537  8
+65537  9
+65537  11
+65537  18
+65537  22
+65537  25
+65537  36
+65537  51
+65537  53
+65537  54
+65537  55
+65537  56
+65537  57
+65537  59
+PREHOOK: query: explain
+select si, max(f) mf, rank() over ( partition by si order by mf )
+FROM over10k
+GROUP BY si
+HAVING max(f) > 0
+limit 50
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select si, max(f) mf, rank() over ( partition by si order by mf )
+FROM over10k
+GROUP BY si
+HAVING max(f) > 0
+limit 50
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: over10k
+            Statistics: Num rows: 127193 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+            Select Operator
+              expressions: si (type: smallint), f (type: float)
+              outputColumnNames: si, f
+              Statistics: Num rows: 127193 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: max(f)
+                keys: si (type: smallint)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 127193 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: smallint)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: smallint)
+                  Statistics: Num rows: 127193 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: float)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: max(VALUE._col0)
+          keys: KEY._col0 (type: smallint)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 63596 Data size: 508768 Basic stats: COMPLETE 
Column stats: NONE
+          Filter Operator
+            predicate: (_col1 > 0.0) (type: boolean)
+            Statistics: Num rows: 21198 Data size: 169584 Basic stats: 
COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: smallint), _col1 (type: float)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: smallint)
+              Statistics: Num rows: 21198 Data size: 169584 Basic stats: 
COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 
(type: float)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 21198 Data size: 169584 Basic stats: COMPLETE 
Column stats: NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col0: smallint, _col1: float
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: _col1 ASC NULLS FIRST
+                  partition by: _col0
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: rank_window_0
+                        arguments: _col1
+                        name: rank
+                        window function: GenericUDAFRankEvaluator
+                        window frame: PRECEDING(MAX)~FOLLOWING(MAX)
+                        isPivotResult: true
+            Statistics: Num rows: 21198 Data size: 169584 Basic stats: 
COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: smallint), _col1 (type: float), 
rank_window_0 (type: int)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 21198 Data size: 169584 Basic stats: 
COMPLETE Column stats: NONE
+              Limit
+                Number of rows: 50
+                Statistics: Num rows: 50 Data size: 400 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 50 Data size: 400 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 50
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select si, max(f) mf, rank() over ( partition by si order by 
mf )
+FROM over10k
+GROUP BY si
+HAVING max(f) > 0
+limit 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select si, max(f) mf, rank() over ( partition by si order by 
mf )
+FROM over10k
+GROUP BY si
+HAVING max(f) > 0
+limit 50
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+256    94.87   1
+257    98.0    1
+258    98.19   1
+259    99.71   1
+260    99.78   1
+261    98.09   1
+262    98.41   1
+263    97.32   1
+264    97.65   1
+265    96.18   1
+266    99.41   1
+267    99.8    1
+268    93.34   1
+269    99.24   1
+270    98.57   1
+271    96.02   1
+272    92.82   1
+273    97.51   1
+274    95.43   1
+275    99.68   1
+276    98.94   1
+277    97.26   1
+278    98.56   1
+279    98.09   1
+280    99.21   1
+281    99.32   1
+282    95.49   1
+283    96.46   1
+284    99.34   1
+285    99.34   1
+286    92.77   1
+287    99.29   1
+288    96.71   1
+289    97.13   1
+290    99.88   1
+291    99.18   1
+292    94.99   1
+293    95.36   1
+294    99.34   1
+295    90.67   1
+296    96.85   1
+297    95.62   1
+298    99.98   1
+299    99.36   1
+300    98.76   1
+301    99.08   1
+302    99.84   1
+303    98.57   1
+304    94.68   1
+305    96.1    1
+PREHOOK: query: explain
+select distinct si, rank() over ( partition by si order by i )
+FROM over10k
+limit 50
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select distinct si, rank() over ( partition by si order by i )
+FROM over10k
+limit 50
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: over10k
+            Statistics: Num rows: 127193 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+            Reduce Output Operator
+              key expressions: si (type: smallint), i (type: int)
+              sort order: ++
+              Map-reduce partition columns: si (type: smallint)
+              Statistics: Num rows: 127193 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 
(type: int)
+          outputColumnNames: _col1, _col2
+          Statistics: Num rows: 127193 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+          PTF Operator
+            Function definitions:
+                Input definition
+                  input alias: ptf_0
+                  output shape: _col1: smallint, _col2: int
+                  type: WINDOWING
+                Windowing table definition
+                  input alias: ptf_1
+                  name: windowingtablefunction
+                  order by: _col2 ASC NULLS FIRST
+                  partition by: _col1
+                  raw input shape:
+                  window functions:
+                      window function definition
+                        alias: rank_window_0
+                        arguments: _col2
+                        name: rank
+                        window function: GenericUDAFRankEvaluator
+                        window frame: PRECEDING(MAX)~FOLLOWING(MAX)
+                        isPivotResult: true
+            Statistics: Num rows: 127193 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col1 (type: smallint), rank_window_0 (type: int)
+              outputColumnNames: _col1, rank_window_0
+              Statistics: Num rows: 127193 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+              Group By Operator
+                keys: _col1 (type: smallint), rank_window_0 (type: int)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 127193 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: smallint), _col1 (type: int)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: smallint), _col1 
(type: int)
+              Statistics: Num rows: 127193 Data size: 1017544 Basic stats: 
COMPLETE Column stats: NONE
+              TopN Hash Memory Usage: 0.1
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: smallint), KEY._col1 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 63596 Data size: 508768 Basic stats: COMPLETE 
Column stats: NONE
+          Limit
+            Number of rows: 50
+            Statistics: Num rows: 50 Data size: 400 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 50 Data size: 400 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 50
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select distinct si, rank() over ( partition by si order by i )
+FROM over10k
+limit 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@over10k
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct si, rank() over ( partition by si order by i )
+FROM over10k
+limit 50
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@over10k
+#### A masked pattern was here ####
+256    1
+256    2
+256    3
+256    4
+256    5
+256    6
+256    7
+256    8
+256    9
+256    10
+256    11
+256    13
+256    14
+256    15
+256    16
+256    17
+256    18
+256    19
+256    20
+256    21
+256    22
+256    23
+256    24
+256    25
+256    26
+256    27
+256    28
+256    29
+256    30
+256    32
+256    33
+256    34
+256    35
+256    37
+257    1
+257    2
+257    3
+257    4
+257    5
+257    6
+257    7
+257    8
+257    9
+257    10
+257    11
+257    12
+257    13
+257    14
+257    16
+257    17

http://git-wip-us.apache.org/repos/asf/hive/blob/36e810fa/ql/src/test/results/clientpositive/llap/cbo_rp_windowing_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/cbo_rp_windowing_2.q.out 
b/ql/src/test/results/clientpositive/llap/cbo_rp_windowing_2.q.out
index aa34d3d..3434336 100644
--- a/ql/src/test/results/clientpositive/llap/cbo_rp_windowing_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/cbo_rp_windowing_2.q.out
@@ -1887,8 +1887,9 @@ window w1 as (distribute by p_mfgr sort by p_name rows 
between 2 preceding and 2
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@part
 #### A masked pattern was here ####
-Manufacturer#1 almond antique burnished rose metallic  2       42
-Manufacturer#1 almond antique chartreuse lavender yellow       34      70
+Manufacturer#1 almond antique burnished rose metallic  2       38
+Manufacturer#1 almond antique burnished rose metallic  2       44
+Manufacturer#1 almond antique chartreuse lavender yellow       34      72
 Manufacturer#1 almond antique salmon chartreuse burlywood      6       112
 Manufacturer#1 almond aquamarine burnished black steel 28      110
 Manufacturer#1 almond aquamarine pink moccasin thistle 42      76

http://git-wip-us.apache.org/repos/asf/hive/blob/36e810fa/ql/src/test/results/clientpositive/llap/windowing.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/windowing.q.out 
b/ql/src/test/results/clientpositive/llap/windowing.q.out
index b17bfc1..190d13b 100644
--- a/ql/src/test/results/clientpositive/llap/windowing.q.out
+++ b/ql/src/test/results/clientpositive/llap/windowing.q.out
@@ -1872,23 +1872,125 @@ Manufacturer#5 almond antique sky peru orange  2       
2       39      108
 Manufacturer#5 almond aquamarine dodger light gainsboro        46      46      
85      77
 Manufacturer#5 almond azure blanched chiffon midnight  23      23      108     
71
 PREHOOK: query: -- 35. testDistinctWithWindowing
+explain
 select DISTINCT p_mfgr, p_name, p_size,
 sum(p_size) over w1 as s
 from part
 window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 
2 following)
 PREHOOK: type: QUERY
-PREHOOK: Input: default@part
-#### A masked pattern was here ####
 POSTHOOK: query: -- 35. testDistinctWithWindowing
+explain
 select DISTINCT p_mfgr, p_name, p_size,
 sum(p_size) over w1 as s
 from part
 window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 
2 following)
 POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 5798 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: p_mfgr (type: string), p_name (type: 
string)
+                    sort order: ++
+                    Map-reduce partition columns: p_mfgr (type: string)
+                    Statistics: Num rows: 26 Data size: 5798 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    value expressions: p_size (type: int)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey1 (type: string), 
KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
+                outputColumnNames: _col1, _col2, _col5
+                Statistics: Num rows: 26 Data size: 12766 Basic stats: 
COMPLETE Column stats: COMPLETE
+                PTF Operator
+                  Function definitions:
+                      Input definition
+                        input alias: ptf_0
+                        output shape: _col1: string, _col2: string, _col5: int
+                        type: WINDOWING
+                      Windowing table definition
+                        input alias: ptf_1
+                        name: windowingtablefunction
+                        order by: _col1 ASC NULLS FIRST
+                        partition by: _col2
+                        raw input shape:
+                        window functions:
+                            window function definition
+                              alias: sum_window_0
+                              arguments: _col5
+                              name: sum
+                              window function: GenericUDAFSumLong
+                              window frame: PRECEDING(2)~FOLLOWING(2)
+                  Statistics: Num rows: 26 Data size: 12766 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: _col2 (type: string), _col1 (type: string), 
_col5 (type: int), sum_window_0 (type: bigint)
+                    outputColumnNames: _col0, _col1, _col2, _col3
+                    Statistics: Num rows: 26 Data size: 12766 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      keys: _col0 (type: string), _col1 (type: string), _col2 
(type: int), _col3 (type: bigint)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 13 Data size: 3003 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: int), _col3 (type: bigint)
+                        sort order: ++++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: int), _col3 (type: bigint)
+                        Statistics: Num rows: 13 Data size: 3003 Basic stats: 
COMPLETE Column stats: COMPLETE
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: int), KEY._col3 (type: bigint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 13 Data size: 3003 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select DISTINCT p_mfgr, p_name, p_size,
+sum(p_size) over w1 as s
+from part
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 
2 following)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select DISTINCT p_mfgr, p_name, p_size,
+sum(p_size) over w1 as s
+from part
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 
2 following)
+POSTHOOK: type: QUERY
 POSTHOOK: Input: default@part
 #### A masked pattern was here ####
-Manufacturer#1 almond antique burnished rose metallic  2       42
-Manufacturer#1 almond antique chartreuse lavender yellow       34      70
+Manufacturer#1 almond antique burnished rose metallic  2       38
+Manufacturer#1 almond antique burnished rose metallic  2       44
+Manufacturer#1 almond antique chartreuse lavender yellow       34      72
 Manufacturer#1 almond antique salmon chartreuse burlywood      6       112
 Manufacturer#1 almond aquamarine burnished black steel 28      110
 Manufacturer#1 almond aquamarine pink moccasin thistle 42      76

http://git-wip-us.apache.org/repos/asf/hive/blob/36e810fa/ql/src/test/results/clientpositive/spark/windowing.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/windowing.q.out 
b/ql/src/test/results/clientpositive/spark/windowing.q.out
index b17bfc1..72b2245 100644
--- a/ql/src/test/results/clientpositive/spark/windowing.q.out
+++ b/ql/src/test/results/clientpositive/spark/windowing.q.out
@@ -1872,23 +1872,120 @@ Manufacturer#5 almond antique sky peru orange  2       
2       39      108
 Manufacturer#5 almond aquamarine dodger light gainsboro        46      46      
85      77
 Manufacturer#5 almond azure blanched chiffon midnight  23      23      108     
71
 PREHOOK: query: -- 35. testDistinctWithWindowing
+explain
 select DISTINCT p_mfgr, p_name, p_size,
 sum(p_size) over w1 as s
 from part
 window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 
2 following)
 PREHOOK: type: QUERY
-PREHOOK: Input: default@part
-#### A masked pattern was here ####
 POSTHOOK: query: -- 35. testDistinctWithWindowing
+explain
 select DISTINCT p_mfgr, p_name, p_size,
 sum(p_size) over w1 as s
 from part
 window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 
2 following)
 POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4)
+        Reducer 3 <- Reducer 2 (GROUP, 4)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: p_mfgr (type: string), p_name (type: 
string)
+                    sort order: ++
+                    Map-reduce partition columns: p_mfgr (type: string)
+                    Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
+                    value expressions: p_size (type: int)
+        Reducer 2 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey1 (type: string), 
KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
+                outputColumnNames: _col1, _col2, _col5
+                Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE 
Column stats: NONE
+                PTF Operator
+                  Function definitions:
+                      Input definition
+                        input alias: ptf_0
+                        output shape: _col1: string, _col2: string, _col5: int
+                        type: WINDOWING
+                      Windowing table definition
+                        input alias: ptf_1
+                        name: windowingtablefunction
+                        order by: _col1 ASC NULLS FIRST
+                        partition by: _col2
+                        raw input shape:
+                        window functions:
+                            window function definition
+                              alias: sum_window_0
+                              arguments: _col5
+                              name: sum
+                              window function: GenericUDAFSumLong
+                              window frame: PRECEDING(2)~FOLLOWING(2)
+                  Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: _col2 (type: string), _col1 (type: string), 
_col5 (type: int), sum_window_0 (type: bigint)
+                    outputColumnNames: _col0, _col1, _col2, _col3
+                    Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: _col0 (type: string), _col1 (type: string), _col2 
(type: int), _col3 (type: bigint)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: int), _col3 (type: bigint)
+                        sort order: ++++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: int), _col3 (type: bigint)
+                        Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: int), KEY._col3 (type: bigint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select DISTINCT p_mfgr, p_name, p_size,
+sum(p_size) over w1 as s
+from part
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 
2 following)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select DISTINCT p_mfgr, p_name, p_size,
+sum(p_size) over w1 as s
+from part
+window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 
2 following)
+POSTHOOK: type: QUERY
 POSTHOOK: Input: default@part
 #### A masked pattern was here ####
-Manufacturer#1 almond antique burnished rose metallic  2       42
-Manufacturer#1 almond antique chartreuse lavender yellow       34      70
+Manufacturer#1 almond antique burnished rose metallic  2       38
+Manufacturer#1 almond antique burnished rose metallic  2       44
+Manufacturer#1 almond antique chartreuse lavender yellow       34      72
 Manufacturer#1 almond antique salmon chartreuse burlywood      6       112
 Manufacturer#1 almond aquamarine burnished black steel 28      110
 Manufacturer#1 almond aquamarine pink moccasin thistle 42      76

Reply via email to