HIVE-14396 : CBO: Calcite Operator To Hive Operator (Calcite Return Path): 
TestCliDriver count.q failure (Vineet Garg via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/076b6ccc
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/076b6ccc
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/076b6ccc

Branch: refs/heads/master
Commit: 076b6cccc3bf3a881d1e15d5f466d2b104b3e23b
Parents: d97e4e2
Author: Vineet Garg <[email protected]>
Authored: Fri Aug 12 14:12:29 2016 -0700
Committer: Ashutosh Chauhan <[email protected]>
Committed: Fri Aug 12 14:13:09 2016 -0700

----------------------------------------------------------------------
 .../ql/optimizer/calcite/HiveCalciteUtil.java   |   7 +-
 .../calcite/translator/HiveGBOpConvUtil.java    | 108 +++----
 ql/src/test/queries/clientpositive/count.q      |  18 ++
 .../clientpositive/groupby_ppr_multi_distinct.q |  17 ++
 ql/src/test/results/clientpositive/count.q.out  | 258 ++++++++++++++++-
 .../groupby_ppr_multi_distinct.q.out            | 255 ++++++++++++++++
 .../results/clientpositive/spark/count.q.out    | 285 +++++++++++++++++-
 .../spark/groupby_ppr_multi_distinct.q.out      | 261 +++++++++++++++++
 .../test/results/clientpositive/tez/count.q.out | 289 ++++++++++++++++++-
 9 files changed, 1398 insertions(+), 100 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/076b6ccc/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
index 774fc59..c527e58 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
@@ -918,8 +918,11 @@ public class HiveCalciteUtil {
       // The following check is only a guard against failures.
       // TODO: Knowing which expr is constant in GBY's aggregation function
       // arguments could be better done using Metadata provider of Calcite.
-      if (exprs != null && index < exprs.size() && exprs.get(index) instanceof 
RexLiteral) {
-        ExprNodeDesc exprNodeDesc = exprConv.visitLiteral((RexLiteral) 
exprs.get(index));
+      //check the corresponding expression in exprs to see if it is literal
+      if (exprs != null && index < exprs.size() && 
exprs.get(inputRefs.get(index)) instanceof RexLiteral) {
+        //because rexInputRefs represent ref expr corresponding to value in 
inputRefs it is used to get
+        //  corresponding index
+        ExprNodeDesc exprNodeDesc = exprConv.visitLiteral((RexLiteral) 
exprs.get(inputRefs.get(index)));
         exprNodes.add(exprNodeDesc);
       } else {
         RexNode iRef = rexInputRefs.get(index);

http://git-wip-us.apache.org/repos/asf/hive/blob/076b6ccc/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
index 25fe059..0f6c5b5 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveGBOpConvUtil.java
@@ -724,7 +724,10 @@ public class HiveGBOpConvUtil {
       outputKeyColumnNames.add(udafName);
       for (int i = 0; i < gbInfo.distExprNodes.size(); i++) {
         reduceKeys.add(gbInfo.distExprNodes.get(i));
-        outputColName = SemanticAnalyzer.getColumnInternalName(i);
+        //this part of reduceKeys is later used to create column names 
strictly for non-distinct aggregates
+        // with parameters same as distinct keys which expects _col0 at the 
end. So we always append
+        // _col0 at the end instead of _col<i>
+        outputColName = SemanticAnalyzer.getColumnInternalName(0);
         String field = Utilities.ReduceField.KEY.toString() + "." + udafName + 
":" + i + "."
             + outputColName;
         ColumnInfo colInfo = new ColumnInfo(field, 
gbInfo.distExprNodes.get(i).getTypeInfo(), null,
@@ -1014,8 +1017,8 @@ public class HiveGBOpConvUtil {
     // --grpkey--,--distkey--,--values--
     // but distUDAF may be before/after some non-distUDAF,
     // i.e., their positions can be mixed.
-    // so we first process distUDAF and then non-distUDAF.
-    // But we need to remember the sequence of udafs.
+    // so for all UDAF we first check to see if it is groupby key, if not is 
it distinct key
+    // if not it should be value
     List<Integer> distinctPositions = new ArrayList<>();
     Map<Integer, ArrayList<ExprNodeDesc>> indexToParameter = new TreeMap<>();
     for (int i = 0; i < gbInfo.udafAttrs.size(); i++) {
@@ -1025,62 +1028,28 @@ public class HiveGBOpConvUtil {
       ColumnInfo rsUDAFParamColInfo;
       ExprNodeDesc udafParam;
       ExprNodeDesc constantPropDistinctUDAFParam;
-      if (udafAttr.isDistinctUDAF) {
-        // udafAttr.udafParamsIndxInGBInfoDistExprs is not quite useful
-        // because distinctUDAF can also include group by key as an argument.
-        for (int j = 0; j < udafAttr.argList.size(); j++) {
-          int argPos = udafAttr.argList.get(j);
-          if (argPos < gbInfo.gbKeys.size() + distinctPositions.size()) {
-            // distinctUDAF includes group by key as an argument or reuses 
distinct keys.
-            rsUDAFParamColInfo = rsColInfoLst.get(argPos);
-          } else {
-            rsUDAFParamColInfo = rsColInfoLst.get(gbInfo.gbKeys.size() + 
distinctPositions.size());
-            distinctPositions.add(argPos);
-          }
-          String rsDistUDAFParamName = rsUDAFParamColInfo.getInternalName();
-          // TODO: verify if this is needed
-          if (lastReduceKeyColName != null) {
-            rsDistUDAFParamName = Utilities.ReduceField.KEY.name() + "." + 
lastReduceKeyColName
-                + ":" + numDistinctUDFs + "." + 
SemanticAnalyzer.getColumnInternalName(j);
-          }
-
-          udafParam = new ExprNodeColumnDesc(rsUDAFParamColInfo.getType(), 
rsDistUDAFParamName,
-              rsUDAFParamColInfo.getTabAlias(), 
rsUDAFParamColInfo.getIsVirtualCol());
-          constantPropDistinctUDAFParam = SemanticAnalyzer
-              
.isConstantParameterInAggregationParameters(rsUDAFParamColInfo.getInternalName(),
-                  reduceValues);
-          if (constantPropDistinctUDAFParam != null) {
-            udafParam = constantPropDistinctUDAFParam;
-          }
-          aggParameters.add(udafParam);
+      for (int j = 0; j < udafAttr.udafParams.size(); j++) {
+        int argPos = getColInfoPos(udafAttr.udafParams.get(j), gbInfo);
+        rsUDAFParamColInfo = rsColInfoLst.get(argPos);
+        String rsUDAFParamName = rsUDAFParamColInfo.getInternalName();
+
+        if (udafAttr.isDistinctUDAF && lastReduceKeyColName != null) {
+          rsUDAFParamName = Utilities.ReduceField.KEY.name() + "." + 
lastReduceKeyColName
+                  + ":" + numDistinctUDFs + "." + 
SemanticAnalyzer.getColumnInternalName(j);
         }
-        indexToParameter.put(i, aggParameters);
-        numDistinctUDFs++;
-      }
-    }
-    for (int i = 0; i < gbInfo.udafAttrs.size(); i++) {
-      UDAFAttrs udafAttr = gbInfo.udafAttrs.get(i);
-      ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
-
-      ColumnInfo rsUDAFParamColInfo;
-      ExprNodeDesc udafParam;
-      ExprNodeDesc constantPropDistinctUDAFParam;
-      if (!udafAttr.isDistinctUDAF) {
-        for (int j = 0; j < udafAttr.udafParams.size(); j++) {
-          int argPos = udafAttr.argList.get(j);
-          rsUDAFParamColInfo = rsColInfoLst.get(argPos + 
getOffSet(distinctPositions, argPos));
-          String rsUDAFParamName = rsUDAFParamColInfo.getInternalName();
-          udafParam = new ExprNodeColumnDesc(rsUDAFParamColInfo.getType(), 
rsUDAFParamName,
-              rsUDAFParamColInfo.getTabAlias(), 
rsUDAFParamColInfo.getIsVirtualCol());
-          constantPropDistinctUDAFParam = SemanticAnalyzer
-              
.isConstantParameterInAggregationParameters(rsUDAFParamColInfo.getInternalName(),
-                  reduceValues);
-          if (constantPropDistinctUDAFParam != null) {
-            udafParam = constantPropDistinctUDAFParam;
-          }
-          aggParameters.add(udafParam);
+        udafParam = new ExprNodeColumnDesc(rsUDAFParamColInfo.getType(), 
rsUDAFParamName,
+                rsUDAFParamColInfo.getTabAlias(), 
rsUDAFParamColInfo.getIsVirtualCol());
+        constantPropDistinctUDAFParam = SemanticAnalyzer
+                
.isConstantParameterInAggregationParameters(rsUDAFParamColInfo.getInternalName(),
+                        reduceValues);
+        if (constantPropDistinctUDAFParam != null) {
+          udafParam = constantPropDistinctUDAFParam;
         }
-        indexToParameter.put(i, aggParameters);
+        aggParameters.add(udafParam);
+      }
+      indexToParameter.put(i, aggParameters);
+      if(udafAttr.isDistinctUDAF) {
+        numDistinctUDFs++;
       }
     }
     for(int index : indexToParameter.keySet()){
@@ -1108,14 +1077,27 @@ public class HiveGBOpConvUtil {
     return new OpAttr("", new HashSet<Integer>(), rsGB1);
   }
 
-  private static int getOffSet(List<Integer> distinctPositions, int pos) {
-    int ret = 0;
-    for (int distPos : distinctPositions) {
-      if (distPos > pos) {
-        ret++;
+  private static int getColInfoPos(ExprNodeDesc aggExpr, GBInfo gbInfo ) {
+    //first see if it is gbkeys
+    int gbKeyIndex = ExprNodeDescUtils.indexOf(aggExpr, gbInfo.gbKeys);
+    if(gbKeyIndex < 0 )  {
+        //then check if it is distinct key
+      int distinctKeyIndex = ExprNodeDescUtils.indexOf(aggExpr, 
gbInfo.distExprNodes);
+      if(distinctKeyIndex < 0) {
+        // lastly it should be in deDupedNonDistIrefs
+        int deDupValIndex = ExprNodeDescUtils.indexOf(aggExpr, 
gbInfo.deDupedNonDistIrefs);
+        assert(deDupValIndex >= 0);
+        return gbInfo.gbKeys.size() + gbInfo.distExprNodes.size() + 
deDupValIndex;
       }
+      else {
+        //aggExpr is part of distinct key
+        return gbInfo.gbKeys.size() + distinctKeyIndex;
+      }
+
+    }
+    else {
+        return gbKeyIndex;
     }
-    return ret;
   }
 
   @SuppressWarnings("unchecked")

http://git-wip-us.apache.org/repos/asf/hive/blob/076b6ccc/ql/src/test/queries/clientpositive/count.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/count.q 
b/ql/src/test/queries/clientpositive/count.q
index 41ffaf2..2849d9a 100644
--- a/ql/src/test/queries/clientpositive/count.q
+++ b/ql/src/test/queries/clientpositive/count.q
@@ -21,6 +21,12 @@ select count(1), count(*), count(a), count(b), count(c), 
count(d), count(distinc
 
 set hive.cbo.returnpath.hiveop=true;
 
+set hive.map.aggr=true;
+--first aggregation with literal. gbinfo was generating wrong expression
+explain select count(1), count(*), count(a), count(b), count(c), count(d), 
count(distinct a), count(distinct b), count(distinct c), count(distinct d), 
count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct 
a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), 
count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), 
count(distinct a,b,c,d) from abcd;
+select count(1), count(*), count(a), count(b), count(c), count(d), 
count(distinct a), count(distinct b), count(distinct c), count(distinct d), 
count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct 
a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), 
count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), 
count(distinct a,b,c,d) from abcd;
+
+set hive.map.aggr=false;
 explain select count(distinct b) from abcd group by a;
 select count(distinct b) from abcd group by a;
 
@@ -33,4 +39,16 @@ select count(distinct b) from abcd group by c;
 explain select count(b), count(distinct c) from abcd group by d;
 select count(b), count(distinct c) from abcd group by d;
 
+--non distinct aggregate with same column as group by key
+explain select a, count(distinct b), count(distinct c), sum(d), sum(d+d), 
sum(d*3), sum(b), sum(c), sum(a), sum(distinct a), sum(distinct b) from abcd 
group by a;
+select a, count(distinct b), count(distinct c), sum(d), sum(d+d), sum(d*3), 
sum(b), sum(c), sum(a), sum(distinct a), sum(distinct b) from abcd group by a;
+
+--non distinct aggregate with same column as distinct aggregate
+explain select a, count(distinct b), count(distinct c), sum(d), sum(c) from 
abcd group by a;
+select a, count(distinct b), count(distinct c), sum(d), sum(c) from abcd group 
by a;
+
+--aggregation with literal
+explain select count(1), count(*), count(a), count(b), count(c), count(d), 
count(distinct a), count(distinct b), count(distinct c), count(distinct d), 
count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct 
a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), 
count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), 
count(distinct a,b,c,d) from abcd;
+select count(1), count(*), count(a), count(b), count(c), count(d), 
count(distinct a), count(distinct b), count(distinct c), count(distinct d), 
count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct 
a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), 
count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), 
count(distinct a,b,c,d) from abcd;
+
 set hive.cbo.returnpath.hiveop=false;

http://git-wip-us.apache.org/repos/asf/hive/blob/076b6ccc/ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct.q 
b/ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct.q
index 74bd2fd..357ab95 100644
--- a/ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct.q
+++ b/ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct.q
@@ -20,3 +20,20 @@ WHERE src.ds = '2008-04-08'
 GROUP BY substr(src.key,1,1);
 
 SELECT dest1.* FROM dest1;
+
+set hive.cbo.returnpath.hiveop=true;
+EXPLAIN EXTENDED
+FROM srcpart src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), 
concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT 
substr(src.value, 5)), count(DISTINCT src.value)
+WHERE src.ds = '2008-04-08'
+GROUP BY substr(src.key,1,1);
+
+FROM srcpart src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), 
concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT 
substr(src.value, 5)), count(DISTINCT src.value)
+WHERE src.ds = '2008-04-08'
+GROUP BY substr(src.key,1,1);
+
+SELECT dest1.* FROM dest1;
+set hive.cbo.returnpath.hiveop=false;

http://git-wip-us.apache.org/repos/asf/hive/blob/076b6ccc/ql/src/test/results/clientpositive/count.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/count.q.out 
b/ql/src/test/results/clientpositive/count.q.out
index c950c5b..641da27 100644
--- a/ql/src/test/results/clientpositive/count.q.out
+++ b/ql/src/test/results/clientpositive/count.q.out
@@ -264,6 +264,67 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@abcd
 #### A masked pattern was here ####
 7      7       6       6       6       7       3       3       6       7       
4       5       6       6       5       6       4       5       5       5       
4
+PREHOOK: query: --first aggregation with literal. gbinfo was generating wrong 
expression
+explain select count(1), count(*), count(a), count(b), count(c), count(d), 
count(distinct a), count(distinct b), count(distinct c), count(distinct d), 
count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct 
a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), 
count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), 
count(distinct a,b,c,d) from abcd
+PREHOOK: type: QUERY
+POSTHOOK: query: --first aggregation with literal. gbinfo was generating wrong 
expression
+explain select count(1), count(*), count(a), count(b), count(c), count(d), 
count(distinct a), count(distinct b), count(distinct c), count(distinct d), 
count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct 
a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), 
count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), 
count(distinct a,b,c,d) from abcd
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: abcd
+            Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column 
stats: NONE
+            Select Operator
+              expressions: a (type: int), b (type: int), c (type: int), d 
(type: int)
+              outputColumnNames: $f1, $f2, $f3, $f4
+              Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: count(1), count(), count($f1), count($f2), 
count($f3), count($f4), count(DISTINCT $f1), count(DISTINCT $f2), 
count(DISTINCT $f3), count(DISTINCT $f4), count(DISTINCT $f1, $f2), 
count(DISTINCT $f2, $f3), count(DISTINCT $f3, $f4), count(DISTINCT $f1, $f4), 
count(DISTINCT $f1, $f3), count(DISTINCT $f2, $f4), count(DISTINCT $f1, $f2, 
$f3), count(DISTINCT $f2, $f3, $f4), count(DISTINCT $f1, $f3, $f4), 
count(DISTINCT $f1, $f2, $f4), count(DISTINCT $f1, $f2, $f3, $f4)
+                keys: $f1 (type: int), $f2 (type: int), $f3 (type: int), $f4 
(type: int)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
+                Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int), _col1 (type: int), _col2 
(type: int), _col3 (type: int)
+                  sort order: ++++
+                  Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE 
Column stats: NONE
+                  value expressions: _col4 (type: bigint), _col5 (type: 
bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), 
_col9 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0), count(VALUE._col1), 
count(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), 
count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), 
count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), 
count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT 
KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, 
KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), 
count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT 
KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, 
KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, 
KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, 
KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, 
KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, 
KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:
 14._col3)
+          mode: mergepartial
+          outputColumnNames: $f0, $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, 
$f10, $f11, $f12, $f13, $f14, $f15, $f16, $f17, $f18, $f19, $f20
+          Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(1), count(*), count(a), count(b), count(c), 
count(d), count(distinct a), count(distinct b), count(distinct c), 
count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct 
c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), 
count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), 
count(distinct a,b,d), count(distinct a,b,c,d) from abcd
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1), count(*), count(a), count(b), count(c), 
count(d), count(distinct a), count(distinct b), count(distinct c), 
count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct 
c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), 
count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), 
count(distinct a,b,d), count(distinct a,b,c,d) from abcd
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+7      7       6       6       6       7       3       3       6       7       
4       5       6       6       5       6       4       5       5       5       
4
 PREHOOK: query: explain select count(distinct b) from abcd group by a
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select count(distinct b) from abcd group by a
@@ -464,30 +525,31 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: abcd
-            Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE Column 
stats: NONE
+            Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE Column 
stats: NONE
             Select Operator
-              expressions: c (type: int), d (type: int)
-              outputColumnNames: c, d
-              Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE 
Column stats: NONE
+              expressions: b (type: int), c (type: int), d (type: int)
+              outputColumnNames: b, c, d
+              Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE 
Column stats: NONE
               Reduce Output Operator
                 key expressions: d (type: int), c (type: int)
                 sort order: ++
                 Map-reduce partition columns: d (type: int)
-                Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE 
Column stats: NONE
+                value expressions: b (type: int)
       Reduce Operator Tree:
         Group By Operator
-          aggregations: count(KEY._col1:0._col0), count(DISTINCT 
KEY._col1:0._col0)
+          aggregations: count(VALUE._col0), count(DISTINCT KEY._col1:0._col0)
           keys: KEY._col0 (type: int)
           mode: complete
           outputColumnNames: d, $f1, $f2
-          Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column 
stats: NONE
+          Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column 
stats: NONE
           Select Operator
             expressions: $f1 (type: bigint), $f2 (type: bigint)
             outputColumnNames: _o__c0, _o__c1
-            Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE Column 
stats: NONE
+            Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column 
stats: NONE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE 
Column stats: NONE
               table:
                   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -507,10 +569,184 @@ POSTHOOK: query: select count(b), count(distinct c) from 
abcd group by d
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@abcd
 #### A masked pattern was here ####
-0      0
-1      1
+0      1
+1      0
 1      1
 1      1
 1      1
 1      1
 1      1
+PREHOOK: query: --non distinct aggregate with same column as group by key
+explain select a, count(distinct b), count(distinct c), sum(d), sum(d+d), 
sum(d*3), sum(b), sum(c), sum(a), sum(distinct a), sum(distinct b) from abcd 
group by a
+PREHOOK: type: QUERY
+POSTHOOK: query: --non distinct aggregate with same column as group by key
+explain select a, count(distinct b), count(distinct c), sum(d), sum(d+d), 
sum(d*3), sum(b), sum(c), sum(a), sum(distinct a), sum(distinct b) from abcd 
group by a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: abcd
+            Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column 
stats: NONE
+            Select Operator
+              expressions: a (type: int), b (type: int), c (type: int), d 
(type: int), (d + d) (type: int), (d * 3) (type: int)
+              outputColumnNames: $f0, $f1, $f2, $f3, $f4, $f5
+              Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE 
Column stats: NONE
+              Reduce Output Operator
+                key expressions: $f0 (type: int), $f1 (type: int), $f2 (type: 
int)
+                sort order: +++
+                Map-reduce partition columns: $f0 (type: int)
+                Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE 
Column stats: NONE
+                value expressions: $f3 (type: int), $f4 (type: int), $f5 
(type: int)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT 
KEY._col1:1._col0), sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), 
sum(KEY._col1:0._col0), sum(KEY._col1:1._col0), sum(KEY._col0), sum(DISTINCT 
KEY._col1:2._col0), sum(DISTINCT KEY._col1:3._col0)
+          keys: KEY._col0 (type: int)
+          mode: complete
+          outputColumnNames: $f0, $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, 
$f10
+          Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE Column 
stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select a, count(distinct b), count(distinct c), sum(d), 
sum(d+d), sum(d*3), sum(b), sum(c), sum(a), sum(distinct a), sum(distinct b) 
from abcd group by a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select a, count(distinct b), count(distinct c), sum(d), 
sum(d+d), sum(d*3), sum(b), sum(c), sum(a), sum(distinct a), sum(distinct b) 
from abcd group by a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+10     2       2       10      20      30      1200    95      30      10      
1100
+100    1       1       3       6       9       100     10      100     100     
100
+12     1       2       9       18      27      100     155     24      12      
100
+NULL   1       1       6       12      18      35      23      NULL    NULL    
35
+PREHOOK: query: --non distinct aggregate with same column as distinct aggregate
+explain select a, count(distinct b), count(distinct c), sum(d), sum(c) from 
abcd group by a
+PREHOOK: type: QUERY
+POSTHOOK: query: --non distinct aggregate with same column as distinct 
aggregate
+explain select a, count(distinct b), count(distinct c), sum(d), sum(c) from 
abcd group by a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: abcd
+            Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column 
stats: NONE
+            Select Operator
+              expressions: a (type: int), b (type: int), c (type: int), d 
(type: int)
+              outputColumnNames: a, b, c, d
+              Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE 
Column stats: NONE
+              Reduce Output Operator
+                key expressions: a (type: int), b (type: int), c (type: int)
+                sort order: +++
+                Map-reduce partition columns: a (type: int)
+                Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE 
Column stats: NONE
+                value expressions: d (type: int)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT 
KEY._col1:1._col0), sum(VALUE._col0), sum(KEY._col1:1._col0)
+          keys: KEY._col0 (type: int)
+          mode: complete
+          outputColumnNames: a, $f1, $f2, $f3, $f4
+          Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE Column 
stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select a, count(distinct b), count(distinct c), sum(d), sum(c) 
from abcd group by a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select a, count(distinct b), count(distinct c), sum(d), 
sum(c) from abcd group by a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+10     2       2       10      95
+100    1       1       3       10
+12     1       2       9       155
+NULL   1       1       6       23
+PREHOOK: query: --aggregation with literal
+explain select count(1), count(*), count(a), count(b), count(c), count(d), 
count(distinct a), count(distinct b), count(distinct c), count(distinct d), 
count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct 
a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), 
count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), 
count(distinct a,b,c,d) from abcd
+PREHOOK: type: QUERY
+POSTHOOK: query: --aggregation with literal
+explain select count(1), count(*), count(a), count(b), count(c), count(d), 
count(distinct a), count(distinct b), count(distinct c), count(distinct d), 
count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct 
a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), 
count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), 
count(distinct a,b,c,d) from abcd
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: abcd
+            Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column 
stats: NONE
+            Select Operator
+              expressions: a (type: int), b (type: int), c (type: int), d 
(type: int)
+              outputColumnNames: $f1, $f2, $f3, $f4
+              Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE 
Column stats: NONE
+              Reduce Output Operator
+                key expressions: $f1 (type: int), $f2 (type: int), $f3 (type: 
int), $f4 (type: int)
+                sort order: ++++
+                Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE 
Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(1), count(), count(KEY._col0:0._col0), 
count(KEY._col0:1._col0), count(KEY._col0:2._col0), count(KEY._col0:3._col0), 
count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), 
count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), 
count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT 
KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, 
KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), 
count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT 
KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, 
KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, 
KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, 
KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, 
KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, 
KEY._col0:14._col1, KEY._col0:14._col2, KEY._co
 l0:14._col3)
+          mode: complete
+          outputColumnNames: $f0, $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, 
$f10, $f11, $f12, $f13, $f14, $f15, $f16, $f17, $f18, $f19, $f20
+          Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(1), count(*), count(a), count(b), count(c), 
count(d), count(distinct a), count(distinct b), count(distinct c), 
count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct 
c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), 
count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), 
count(distinct a,b,d), count(distinct a,b,c,d) from abcd
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1), count(*), count(a), count(b), count(c), 
count(d), count(distinct a), count(distinct b), count(distinct c), 
count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct 
c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), 
count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), 
count(distinct a,b,d), count(distinct a,b,c,d) from abcd
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+7      7       6       6       6       7       3       3       6       7       
4       5       6       6       5       6       4       5       5       5       
4

http://git-wip-us.apache.org/repos/asf/hive/blob/076b6ccc/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out 
b/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
index 33d1ed0..6595196 100644
--- a/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out
@@ -265,3 +265,258 @@ POSTHOOK: Input: default@dest1
 7      6       71470.0 447     6
 8      8       81524.0 595     8
 9      7       92094.0 577     7
+PREHOOK: query: EXPLAIN EXTENDED
+FROM srcpart src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), 
concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT 
substr(src.value, 5)), count(DISTINCT src.value)
+WHERE src.ds = '2008-04-08'
+GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED
+FROM srcpart src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), 
concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT 
substr(src.value, 5)), count(DISTINCT src.value)
+WHERE src.ds = '2008-04-08'
+GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE 
Column stats: NONE
+            GatherStats: false
+            Select Operator
+              expressions: substr(key, 1, 1) (type: string), substr(value, 5) 
(type: string), value (type: string)
+              outputColumnNames: $f0, $f1, $f2
+              Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: $f0 (type: string), $f1 (type: string), $f2 
(type: string)
+                null sort order: aaa
+                sort order: +++
+                Map-reduce partition columns: $f0 (type: string)
+                Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
+                tag: -1
+                auto parallelism: false
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: hr=11
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2008-04-08
+              hr 11
+            properties:
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+              bucket_count -1
+              columns key,value
+              columns.comments 'default','default'
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.srcpart
+              numFiles 1
+              numRows 500
+              partition_columns ds/hr
+              partition_columns.types string:string
+              rawDataSize 5312
+              serialization.ddl struct srcpart { string key, string value}
+              serialization.format 1
+              serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 5812
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 'default','default'
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.srcpart
+                partition_columns ds/hr
+                partition_columns.types string:string
+                serialization.ddl struct srcpart { string key, string value}
+                serialization.format 1
+                serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.srcpart
+            name: default.srcpart
+#### A masked pattern was here ####
+          Partition
+            base file name: hr=12
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2008-04-08
+              hr 12
+            properties:
+              COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+              bucket_count -1
+              columns key,value
+              columns.comments 'default','default'
+              columns.types string:string
+#### A masked pattern was here ####
+              name default.srcpart
+              numFiles 1
+              numRows 500
+              partition_columns ds/hr
+              partition_columns.types string:string
+              rawDataSize 5312
+              serialization.ddl struct srcpart { string key, string value}
+              serialization.format 1
+              serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 5812
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.comments 'default','default'
+                columns.types string:string
+#### A masked pattern was here ####
+                name default.srcpart
+                partition_columns ds/hr
+                partition_columns.types string:string
+                serialization.ddl struct srcpart { string key, string value}
+                serialization.format 1
+                serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.srcpart
+            name: default.srcpart
+      Truncated Path -> Alias:
+        /srcpart/ds=2008-04-08/hr=11 [src]
+        /srcpart/ds=2008-04-08/hr=12 [src]
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col1:0._col0), 
sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(DISTINCT 
KEY._col1:2._col0)
+          keys: KEY._col0 (type: string)
+          mode: complete
+          outputColumnNames: $f0, $f1, $f2, $f3, $f4
+          Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: $f0 (type: string), UDFToInteger($f1) (type: int), 
concat($f0, $f2) (type: string), UDFToInteger($f3) (type: int), 
UDFToInteger($f4) (type: int)
+            outputColumnNames: _col0, _col1, _col2, _col3, _col4
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              GlobalTableId: 1
+#### A masked pattern was here ####
+              NumFilesPerFileSink: 1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+#### A masked pattern was here ####
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  properties:
+                    COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                    bucket_count -1
+                    columns key,c1,c2,c3,c4
+                    columns.comments 
+                    columns.types string:int:string:int:int
+#### A masked pattern was here ####
+                    name default.dest1
+                    numFiles 1
+                    numRows 10
+                    rawDataSize 184
+                    serialization.ddl struct dest1 { string key, i32 c1, 
string c2, i32 c3, i32 c4}
+                    serialization.format 1
+                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    totalSize 194
+#### A masked pattern was here ####
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.dest1
+              TotalFiles: 1
+              GatherStats: true
+              MultiFileSpray: false
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+#### A masked pattern was here ####
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                bucket_count -1
+                columns key,c1,c2,c3,c4
+                columns.comments 
+                columns.types string:int:string:int:int
+#### A masked pattern was here ####
+                name default.dest1
+                numFiles 1
+                numRows 10
+                rawDataSize 184
+                serialization.ddl struct dest1 { string key, i32 c1, string 
c2, i32 c3, i32 c4}
+                serialization.format 1
+                serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 194
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.dest1
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+#### A masked pattern was here ####
+
+PREHOOK: query: FROM srcpart src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), 
concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT 
substr(src.value, 5)), count(DISTINCT src.value)
+WHERE src.ds = '2008-04-08'
+GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Output: default@dest1
+POSTHOOK: query: FROM srcpart src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), 
concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT 
substr(src.value, 5)), count(DISTINCT src.value)
+WHERE src.ds = '2008-04-08'
+GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, 
type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, 
type:string, comment:default), (srcpart)src.FieldSchema(name:value, 
type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c3 EXPRESSION [(srcpart)src.FieldSchema(name:value, 
type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c4 EXPRESSION [(srcpart)src.null, ]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, 
type:string, comment:default), ]
+PREHOOK: query: SELECT dest1.* FROM dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT dest1.* FROM dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+0      1       00.0    0       1
+1      71      132828.0        10044   71
+2      69      251142.0        15780   69
+3      62      364008.0        20119   62
+4      74      4105526.0       30965   74
+5      6       5794.0  278     6
+6      5       6796.0  331     5
+7      6       71470.0 447     6
+8      8       81524.0 595     8
+9      7       92094.0 577     7

http://git-wip-us.apache.org/repos/asf/hive/blob/076b6ccc/ql/src/test/results/clientpositive/spark/count.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/count.q.out 
b/ql/src/test/results/clientpositive/spark/count.q.out
index b1ad662..06f7235 100644
--- a/ql/src/test/results/clientpositive/spark/count.q.out
+++ b/ql/src/test/results/clientpositive/spark/count.q.out
@@ -288,6 +288,73 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@abcd
 #### A masked pattern was here ####
 7      7       6       6       6       7       3       3       6       7       
4       5       6       6       5       6       4       5       5       5       
4
+PREHOOK: query: --first aggregation with literal. gbinfo was generating wrong 
expression
+explain select count(1), count(*), count(a), count(b), count(c), count(d), 
count(distinct a), count(distinct b), count(distinct c), count(distinct d), 
count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct 
a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), 
count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), 
count(distinct a,b,c,d) from abcd
+PREHOOK: type: QUERY
+POSTHOOK: query: --first aggregation with literal. gbinfo was generating wrong 
expression
+explain select count(1), count(*), count(a), count(b), count(c), count(d), 
count(distinct a), count(distinct b), count(distinct c), count(distinct d), 
count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct 
a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), 
count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), 
count(distinct a,b,c,d) from abcd
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: abcd
+                  Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: a (type: int), b (type: int), c (type: int), 
d (type: int)
+                    outputColumnNames: $f1, $f2, $f3, $f4
+                    Statistics: Num rows: 4 Data size: 78 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count(1), count(), count($f1), count($f2), 
count($f3), count($f4), count(DISTINCT $f1), count(DISTINCT $f2), 
count(DISTINCT $f3), count(DISTINCT $f4), count(DISTINCT $f1, $f2), 
count(DISTINCT $f2, $f3), count(DISTINCT $f3, $f4), count(DISTINCT $f1, $f4), 
count(DISTINCT $f1, $f3), count(DISTINCT $f2, $f4), count(DISTINCT $f1, $f2, 
$f3), count(DISTINCT $f2, $f3, $f4), count(DISTINCT $f1, $f3, $f4), 
count(DISTINCT $f1, $f2, $f4), count(DISTINCT $f1, $f2, $f3, $f4)
+                      keys: $f1 (type: int), $f2 (type: int), $f3 (type: int), 
$f4 (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, 
_col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
+                      Statistics: Num rows: 4 Data size: 78 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int), _col1 (type: int), 
_col2 (type: int), _col3 (type: int)
+                        sort order: ++++
+                        Statistics: Num rows: 4 Data size: 78 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col4 (type: bigint), _col5 (type: 
bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), 
_col9 (type: bigint)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0), count(VALUE._col1), 
count(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), 
count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), 
count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), 
count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT 
KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, 
KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), 
count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT 
KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, 
KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, 
KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, 
KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, 
KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, 
KEY._col0:14._col1, KEY._col0:14._col2, KEY.
 _col0:14._col3)
+                mode: mergepartial
+                outputColumnNames: $f0, $f1, $f2, $f3, $f4, $f5, $f6, $f7, 
$f8, $f9, $f10, $f11, $f12, $f13, $f14, $f15, $f16, $f17, $f18, $f19, $f20
+                Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(1), count(*), count(a), count(b), count(c), 
count(d), count(distinct a), count(distinct b), count(distinct c), 
count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct 
c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), 
count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), 
count(distinct a,b,d), count(distinct a,b,c,d) from abcd
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1), count(*), count(a), count(b), count(c), 
count(d), count(distinct a), count(distinct b), count(distinct c), 
count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct 
c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), 
count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), 
count(distinct a,b,d), count(distinct a,b,c,d) from abcd
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+7      7       6       6       6       7       3       3       6       7       
4       5       6       6       5       6       4       5       5       5       
4
 PREHOOK: query: explain select count(distinct b) from abcd group by a
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select count(distinct b) from abcd group by a
@@ -491,10 +558,11 @@ STAGE PLANS:
                   sort order: +
                   Map-reduce partition columns: c (type: int)
                   Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE 
Column stats: NONE
+                  value expressions: b (type: int)
         Reducer 3 
             Reduce Operator Tree:
               Group By Operator
-                aggregations: count(KEY._col0)
+                aggregations: count(VALUE._col0)
                 keys: KEY._col0 (type: int)
                 mode: complete
                 outputColumnNames: c, $f1
@@ -551,31 +619,32 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: abcd
-                  Statistics: Num rows: 9 Data size: 78 Basic stats: COMPLETE 
Column stats: NONE
+                  Statistics: Num rows: 6 Data size: 78 Basic stats: COMPLETE 
Column stats: NONE
                   Select Operator
-                    expressions: c (type: int), d (type: int)
-                    outputColumnNames: c, d
-                    Statistics: Num rows: 9 Data size: 78 Basic stats: 
COMPLETE Column stats: NONE
+                    expressions: b (type: int), c (type: int), d (type: int)
+                    outputColumnNames: b, c, d
+                    Statistics: Num rows: 6 Data size: 78 Basic stats: 
COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: d (type: int), c (type: int)
                       sort order: ++
                       Map-reduce partition columns: d (type: int)
-                      Statistics: Num rows: 9 Data size: 78 Basic stats: 
COMPLETE Column stats: NONE
+                      Statistics: Num rows: 6 Data size: 78 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: b (type: int)
         Reducer 2 
             Reduce Operator Tree:
               Group By Operator
-                aggregations: count(KEY._col1:0._col0), count(DISTINCT 
KEY._col1:0._col0)
+                aggregations: count(VALUE._col0), count(DISTINCT 
KEY._col1:0._col0)
                 keys: KEY._col0 (type: int)
                 mode: complete
                 outputColumnNames: d, $f1, $f2
-                Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
                   expressions: $f1 (type: bigint), $f2 (type: bigint)
                   outputColumnNames: _o__c0, _o__c1
-                  Statistics: Num rows: 4 Data size: 34 Basic stats: COMPLETE 
Column stats: NONE
+                  Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE 
Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 4 Data size: 34 Basic stats: 
COMPLETE Column stats: NONE
+                    Statistics: Num rows: 3 Data size: 39 Basic stats: 
COMPLETE Column stats: NONE
                     table:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -595,10 +664,202 @@ POSTHOOK: query: select count(b), count(distinct c) from 
abcd group by d
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@abcd
 #### A masked pattern was here ####
-0      0
-1      1
+0      1
+1      0
 1      1
 1      1
 1      1
 1      1
 1      1
+PREHOOK: query: --non distinct aggregate with same column as group by key
+explain select a, count(distinct b), count(distinct c), sum(d), sum(d+d), 
sum(d*3), sum(b), sum(c), sum(a), sum(distinct a), sum(distinct b) from abcd 
group by a
+PREHOOK: type: QUERY
+POSTHOOK: query: --non distinct aggregate with same column as group by key
+explain select a, count(distinct b), count(distinct c), sum(d), sum(d+d), 
sum(d*3), sum(b), sum(c), sum(a), sum(distinct a), sum(distinct b) from abcd 
group by a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: abcd
+                  Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: a (type: int), b (type: int), c (type: int), 
d (type: int), (d + d) (type: int), (d * 3) (type: int)
+                    outputColumnNames: $f0, $f1, $f2, $f3, $f4, $f5
+                    Statistics: Num rows: 4 Data size: 78 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: $f0 (type: int), $f1 (type: int), $f2 
(type: int)
+                      sort order: +++
+                      Map-reduce partition columns: $f0 (type: int)
+                      Statistics: Num rows: 4 Data size: 78 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: $f3 (type: int), $f4 (type: int), $f5 
(type: int)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(DISTINCT KEY._col1:0._col0), 
count(DISTINCT KEY._col1:1._col0), sum(VALUE._col0), sum(VALUE._col1), 
sum(VALUE._col2), sum(KEY._col1:0._col0), sum(KEY._col1:1._col0), 
sum(KEY._col0), sum(DISTINCT KEY._col1:2._col0), sum(DISTINCT KEY._col1:3._col0)
+                keys: KEY._col0 (type: int)
+                mode: complete
+                outputColumnNames: $f0, $f1, $f2, $f3, $f4, $f5, $f6, $f7, 
$f8, $f9, $f10
+                Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select a, count(distinct b), count(distinct c), sum(d), 
sum(d+d), sum(d*3), sum(b), sum(c), sum(a), sum(distinct a), sum(distinct b) 
from abcd group by a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select a, count(distinct b), count(distinct c), sum(d), 
sum(d+d), sum(d*3), sum(b), sum(c), sum(a), sum(distinct a), sum(distinct b) 
from abcd group by a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+10     2       2       10      20      30      1200    95      30      10      
1100
+100    1       1       3       6       9       100     10      100     100     
100
+12     1       2       9       18      27      100     155     24      12      
100
+NULL   1       1       6       12      18      35      23      NULL    NULL    
35
+PREHOOK: query: --non distinct aggregate with same column as distinct aggregate
+explain select a, count(distinct b), count(distinct c), sum(d), sum(c) from 
abcd group by a
+PREHOOK: type: QUERY
+POSTHOOK: query: --non distinct aggregate with same column as distinct 
aggregate
+explain select a, count(distinct b), count(distinct c), sum(d), sum(c) from 
abcd group by a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: abcd
+                  Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: a (type: int), b (type: int), c (type: int), 
d (type: int)
+                    outputColumnNames: a, b, c, d
+                    Statistics: Num rows: 4 Data size: 78 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: a (type: int), b (type: int), c (type: 
int)
+                      sort order: +++
+                      Map-reduce partition columns: a (type: int)
+                      Statistics: Num rows: 4 Data size: 78 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: d (type: int)
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(DISTINCT KEY._col1:0._col0), 
count(DISTINCT KEY._col1:1._col0), sum(VALUE._col0), sum(KEY._col1:1._col0)
+                keys: KEY._col0 (type: int)
+                mode: complete
+                outputColumnNames: a, $f1, $f2, $f3, $f4
+                Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select a, count(distinct b), count(distinct c), sum(d), sum(c) 
from abcd group by a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select a, count(distinct b), count(distinct c), sum(d), 
sum(c) from abcd group by a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+10     2       2       10      95
+100    1       1       3       10
+12     1       2       9       155
+NULL   1       1       6       23
+PREHOOK: query: --aggregation with literal
+explain select count(1), count(*), count(a), count(b), count(c), count(d), 
count(distinct a), count(distinct b), count(distinct c), count(distinct d), 
count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct 
a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), 
count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), 
count(distinct a,b,c,d) from abcd
+PREHOOK: type: QUERY
+POSTHOOK: query: --aggregation with literal
+explain select count(1), count(*), count(a), count(b), count(c), count(d), 
count(distinct a), count(distinct b), count(distinct c), count(distinct d), 
count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct 
a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), 
count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), 
count(distinct a,b,c,d) from abcd
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: abcd
+                  Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: a (type: int), b (type: int), c (type: int), 
d (type: int)
+                    outputColumnNames: $f1, $f2, $f3, $f4
+                    Statistics: Num rows: 4 Data size: 78 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: $f1 (type: int), $f2 (type: int), $f3 
(type: int), $f4 (type: int)
+                      sort order: ++++
+                      Statistics: Num rows: 4 Data size: 78 Basic stats: 
COMPLETE Column stats: NONE
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(1), count(), count(KEY._col0:0._col0), 
count(KEY._col0:1._col0), count(KEY._col0:2._col0), count(KEY._col0:3._col0), 
count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), 
count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), 
count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT 
KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, 
KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), 
count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT 
KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, 
KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, 
KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, 
KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, 
KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, 
KEY._col0:14._col1, KEY._col0:14._col2, K
 EY._col0:14._col3)
+                mode: complete
+                outputColumnNames: $f0, $f1, $f2, $f3, $f4, $f5, $f6, $f7, 
$f8, $f9, $f10, $f11, $f12, $f13, $f14, $f15, $f16, $f17, $f18, $f19, $f20
+                Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(1), count(*), count(a), count(b), count(c), 
count(d), count(distinct a), count(distinct b), count(distinct c), 
count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct 
c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), 
count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), 
count(distinct a,b,d), count(distinct a,b,c,d) from abcd
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1), count(*), count(a), count(b), count(c), 
count(d), count(distinct a), count(distinct b), count(distinct c), 
count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct 
c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), 
count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), 
count(distinct a,b,d), count(distinct a,b,c,d) from abcd
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+7      7       6       6       6       7       3       3       6       7       
4       5       6       6       5       6       4       5       5       5       
4

http://git-wip-us.apache.org/repos/asf/hive/blob/076b6ccc/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out 
b/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out
index 5251241..7d2f9c3 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out
@@ -271,3 +271,264 @@ POSTHOOK: Input: default@dest1
 7      6       71470.0 447     6
 8      8       81524.0 595     8
 9      7       92094.0 577     7
+PREHOOK: query: EXPLAIN EXTENDED
+FROM srcpart src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), 
concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT 
substr(src.value, 5)), count(DISTINCT src.value)
+WHERE src.ds = '2008-04-08'
+GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN EXTENDED
+FROM srcpart src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), 
concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT 
substr(src.value, 5)), count(DISTINCT src.value)
+WHERE src.ds = '2008-04-08'
+GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+  Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
+                  GatherStats: false
+                  Select Operator
+                    expressions: substr(key, 1, 1) (type: string), 
substr(value, 5) (type: string), value (type: string)
+                    outputColumnNames: $f0, $f1, $f2
+                    Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: $f0 (type: string), $f1 (type: string), 
$f2 (type: string)
+                      null sort order: aaa
+                      sort order: +++
+                      Map-reduce partition columns: $f0 (type: string)
+                      Statistics: Num rows: 1000 Data size: 10624 Basic stats: 
COMPLETE Column stats: NONE
+                      tag: -1
+                      auto parallelism: false
+            Path -> Alias:
+#### A masked pattern was here ####
+            Path -> Partition:
+#### A masked pattern was here ####
+                Partition
+                  base file name: hr=11
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  partition values:
+                    ds 2008-04-08
+                    hr 11
+                  properties:
+                    COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+                    bucket_count -1
+                    columns key,value
+                    columns.comments 'default','default'
+                    columns.types string:string
+#### A masked pattern was here ####
+                    name default.srcpart
+                    numFiles 1
+                    numRows 500
+                    partition_columns ds/hr
+                    partition_columns.types string:string
+                    rawDataSize 5312
+                    serialization.ddl struct srcpart { string key, string 
value}
+                    serialization.format 1
+                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    totalSize 5812
+#### A masked pattern was here ####
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      bucket_count -1
+                      columns key,value
+                      columns.comments 'default','default'
+                      columns.types string:string
+#### A masked pattern was here ####
+                      name default.srcpart
+                      partition_columns ds/hr
+                      partition_columns.types string:string
+                      serialization.ddl struct srcpart { string key, string 
value}
+                      serialization.format 1
+                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.srcpart
+                  name: default.srcpart
+#### A masked pattern was here ####
+                Partition
+                  base file name: hr=12
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  partition values:
+                    ds 2008-04-08
+                    hr 12
+                  properties:
+                    COLUMN_STATS_ACCURATE 
{"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
+                    bucket_count -1
+                    columns key,value
+                    columns.comments 'default','default'
+                    columns.types string:string
+#### A masked pattern was here ####
+                    name default.srcpart
+                    numFiles 1
+                    numRows 500
+                    partition_columns ds/hr
+                    partition_columns.types string:string
+                    rawDataSize 5312
+                    serialization.ddl struct srcpart { string key, string 
value}
+                    serialization.format 1
+                    serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    totalSize 5812
+#### A masked pattern was here ####
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    properties:
+                      bucket_count -1
+                      columns key,value
+                      columns.comments 'default','default'
+                      columns.types string:string
+#### A masked pattern was here ####
+                      name default.srcpart
+                      partition_columns ds/hr
+                      partition_columns.types string:string
+                      serialization.ddl struct srcpart { string key, string 
value}
+                      serialization.format 1
+                      serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: default.srcpart
+                  name: default.srcpart
+            Truncated Path -> Alias:
+              /srcpart/ds=2008-04-08/hr=11 [src]
+              /srcpart/ds=2008-04-08/hr=12 [src]
+        Reducer 2 
+            Needs Tagging: false
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(DISTINCT KEY._col1:0._col0), 
sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(DISTINCT 
KEY._col1:2._col0)
+                keys: KEY._col0 (type: string)
+                mode: complete
+                outputColumnNames: $f0, $f1, $f2, $f3, $f4
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: $f0 (type: string), UDFToInteger($f1) (type: 
int), concat($f0, $f2) (type: string), UDFToInteger($f3) (type: int), 
UDFToInteger($f4) (type: int)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 1
+#### A masked pattern was here ####
+                    NumFilesPerFileSink: 1
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        properties:
+                          COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                          bucket_count -1
+                          columns key,c1,c2,c3,c4
+                          columns.comments 
+                          columns.types string:int:string:int:int
+#### A masked pattern was here ####
+                          name default.dest1
+                          numFiles 2
+                          numRows 10
+                          rawDataSize 184
+                          serialization.ddl struct dest1 { string key, i32 c1, 
string c2, i32 c3, i32 c4}
+                          serialization.format 1
+                          serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                          totalSize 194
+#### A masked pattern was here ####
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                        name: default.dest1
+                    TotalFiles: 1
+                    GatherStats: true
+                    MultiFileSpray: false
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+#### A masked pattern was here ####
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+                bucket_count -1
+                columns key,c1,c2,c3,c4
+                columns.comments 
+                columns.types string:int:string:int:int
+#### A masked pattern was here ####
+                name default.dest1
+                numFiles 2
+                numRows 10
+                rawDataSize 184
+                serialization.ddl struct dest1 { string key, i32 c1, string 
c2, i32 c3, i32 c4}
+                serialization.format 1
+                serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 194
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.dest1
+
+  Stage: Stage-2
+    Stats-Aggr Operator
+#### A masked pattern was here ####
+
+PREHOOK: query: FROM srcpart src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), 
concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT 
substr(src.value, 5)), count(DISTINCT src.value)
+WHERE src.ds = '2008-04-08'
+GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Output: default@dest1
+POSTHOOK: query: FROM srcpart src
+INSERT OVERWRITE TABLE dest1
+SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), 
concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT 
substr(src.value, 5)), count(DISTINCT src.value)
+WHERE src.ds = '2008-04-08'
+GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, 
type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, 
type:string, comment:default), (srcpart)src.FieldSchema(name:value, 
type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c3 EXPRESSION [(srcpart)src.FieldSchema(name:value, 
type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c4 EXPRESSION [(srcpart)src.null, ]
+POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, 
type:string, comment:default), ]
+PREHOOK: query: SELECT dest1.* FROM dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT dest1.* FROM dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+0      1       00.0    0       1
+1      71      132828.0        10044   71
+2      69      251142.0        15780   69
+3      62      364008.0        20119   62
+4      74      4105526.0       30965   74
+5      6       5794.0  278     6
+6      5       6796.0  331     5
+7      6       71470.0 447     6
+8      8       81524.0 595     8
+9      7       92094.0 577     7

Reply via email to