[1/2] hive git commit: HIVE-13713: We miss vectorization in a case of count(*) when aggregation mode is COMPLETE (Matt McCline, reviewed by Sergey Shelukhin)

mmccline Mon, 30 May 2016 16:59:13 -0700

Repository: hive
Updated Branches:
  refs/heads/master d74d5637f -> 8136a10c1



http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java
index 451947b..f5b5d9d 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java
@@ -50,6 +50,8 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.GroupByDesc;
 import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc;
+import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
 import org.apache.hadoop.hive.serde2.io.ByteWritable;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
@@ -84,6 +86,7 @@ public class TestVectorGroupByOperator {
   private static AggregationDesc buildAggregationDesc(
       VectorizationContext ctx,
       String aggregate,
+      GenericUDAFEvaluator.Mode mode,
       String column,
       TypeInfo typeInfo) {
 
@@ -94,6 +97,7 @@ public class TestVectorGroupByOperator {
 
     AggregationDesc agg = new AggregationDesc();
     agg.setGenericUDAFName(aggregate);
+    agg.setMode(mode);
     agg.setParameters(params);
 
     return agg;
@@ -102,6 +106,7 @@ public class TestVectorGroupByOperator {
       VectorizationContext ctx) {
     AggregationDesc agg = new AggregationDesc();
     agg.setGenericUDAFName("COUNT");
+    agg.setMode(GenericUDAFEvaluator.Mode.PARTIAL1);
     agg.setParameters(new ArrayList<ExprNodeDesc>());
     return agg;
   }
@@ -110,10 +115,11 @@ public class TestVectorGroupByOperator {
   private static GroupByDesc buildGroupByDescType(
       VectorizationContext ctx,
       String aggregate,
+      GenericUDAFEvaluator.Mode mode,
       String column,
       TypeInfo dataType) {
 
-    AggregationDesc agg = buildAggregationDesc(ctx, aggregate,
+    AggregationDesc agg = buildAggregationDesc(ctx, aggregate, mode,
         column, dataType);
     ArrayList<AggregationDesc> aggs = new ArrayList<AggregationDesc>();
     aggs.add(agg);
@@ -124,6 +130,7 @@ public class TestVectorGroupByOperator {
     GroupByDesc desc = new GroupByDesc();
     desc.setOutputColumnNames(outputColumnNames);
     desc.setAggregators(aggs);
+    desc.getVectorDesc().setProcessingMode(ProcessingMode.GLOBAL);
 
     return desc;
   }
@@ -154,7 +161,8 @@ public class TestVectorGroupByOperator {
       String key,
       TypeInfo keyTypeInfo) {
 
-    GroupByDesc desc = buildGroupByDescType(ctx, aggregate, column, 
dataTypeInfo);
+    GroupByDesc desc = buildGroupByDescType(ctx, aggregate, 
GenericUDAFEvaluator.Mode.PARTIAL1, column, dataTypeInfo);
+    desc.getVectorDesc().setProcessingMode(ProcessingMode.HASH);
 
     ExprNodeDesc keyExp = buildColumnDesc(ctx, key, keyTypeInfo);
     ArrayList<ExprNodeDesc> keys = new ArrayList<ExprNodeDesc>();
@@ -1716,7 +1724,7 @@ public class TestVectorGroupByOperator {
 
     ArrayList<AggregationDesc> aggs = new ArrayList(1);
     aggs.add(
-        buildAggregationDesc(ctx, aggregateName,
+        buildAggregationDesc(ctx, aggregateName, 
GenericUDAFEvaluator.Mode.PARTIAL1,
             "value", TypeInfoFactory.getPrimitiveTypeInfo(columnTypes[i])));
 
     for(i=0; i<columnTypes.length - 1; ++i) {
@@ -1730,6 +1738,7 @@ public class TestVectorGroupByOperator {
     desc.setOutputColumnNames(outputColumnNames);
     desc.setAggregators(aggs);
     desc.setKeys(keysDesc);
+    desc.getVectorDesc().setProcessingMode(ProcessingMode.HASH);
 
     CompilationOpContext cCtx = new CompilationOpContext();
     VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc);
@@ -1827,7 +1836,7 @@ public class TestVectorGroupByOperator {
     VectorizationContext ctx = new VectorizationContext("name", 
mapColumnNames);
     Set<Object> keys = new HashSet<Object>();
 
-    AggregationDesc agg = buildAggregationDesc(ctx, aggregateName,
+    AggregationDesc agg = buildAggregationDesc(ctx, aggregateName, 
GenericUDAFEvaluator.Mode.PARTIAL1,
         "Value", TypeInfoFactory.getPrimitiveTypeInfo(data.getTypes()[1]));
     ArrayList<AggregationDesc> aggs = new ArrayList<AggregationDesc>();
     aggs.add(agg);
@@ -1839,6 +1848,7 @@ public class TestVectorGroupByOperator {
     GroupByDesc desc = new GroupByDesc();
     desc.setOutputColumnNames(outputColumnNames);
     desc.setAggregators(aggs);
+    desc.getVectorDesc().setProcessingMode(ProcessingMode.HASH);
 
     ExprNodeDesc keyExp = buildColumnDesc(ctx, "Key",
         TypeInfoFactory.getPrimitiveTypeInfo(data.getTypes()[0]));
@@ -2242,6 +2252,7 @@ public class TestVectorGroupByOperator {
     VectorizationContext ctx = new VectorizationContext("name", 
mapColumnNames);
 
     GroupByDesc desc = buildGroupByDescCountStar (ctx);
+    desc.getVectorDesc().setProcessingMode(ProcessingMode.HASH);
 
     CompilationOpContext cCtx = new CompilationOpContext();
     VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc);
@@ -2271,9 +2282,9 @@ public class TestVectorGroupByOperator {
     mapColumnNames.add("A");
     VectorizationContext ctx = new VectorizationContext("name", 
mapColumnNames);
 
-    GroupByDesc desc = buildGroupByDescType(ctx, "count", "A", 
TypeInfoFactory.longTypeInfo);
+    GroupByDesc desc = buildGroupByDescType(ctx, "count", 
GenericUDAFEvaluator.Mode.FINAL, "A", TypeInfoFactory.longTypeInfo);
     VectorGroupByDesc vectorDesc = desc.getVectorDesc();
-    vectorDesc.setIsReduceMergePartial(true);
+    vectorDesc.setProcessingMode(ProcessingMode.GLOBAL);  // Use GLOBAL when 
no key for Reduce.
     CompilationOpContext cCtx = new CompilationOpContext();
     VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc);
 
@@ -2303,7 +2314,7 @@ public class TestVectorGroupByOperator {
     mapColumnNames.add("A");
     VectorizationContext ctx = new VectorizationContext("name", 
mapColumnNames);
 
-    GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, "A",
+    GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, 
GenericUDAFEvaluator.Mode.PARTIAL1, "A",
         TypeInfoFactory.stringTypeInfo);
 
     CompilationOpContext cCtx = new CompilationOpContext();
@@ -2336,7 +2347,7 @@ public class TestVectorGroupByOperator {
           VectorizationContext ctx = new VectorizationContext("name", 
mapColumnNames);
 
     GroupByDesc desc =
-        buildGroupByDescType(ctx, aggregateName, "A", 
TypeInfoFactory.getDecimalTypeInfo(30, 4));
+        buildGroupByDescType(ctx, aggregateName, 
GenericUDAFEvaluator.Mode.PARTIAL1, "A", TypeInfoFactory.getDecimalTypeInfo(30, 
4));
 
     CompilationOpContext cCtx = new CompilationOpContext();
     VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc);
@@ -2368,7 +2379,7 @@ public class TestVectorGroupByOperator {
     mapColumnNames.add("A");
     VectorizationContext ctx = new VectorizationContext("name", 
mapColumnNames);
 
-    GroupByDesc desc = buildGroupByDescType (ctx, aggregateName, "A",
+    GroupByDesc desc = buildGroupByDescType (ctx, aggregateName, 
GenericUDAFEvaluator.Mode.PARTIAL1, "A",
         TypeInfoFactory.doubleTypeInfo);
 
     CompilationOpContext cCtx = new CompilationOpContext();
@@ -2400,7 +2411,7 @@ public class TestVectorGroupByOperator {
     mapColumnNames.add("A");
     VectorizationContext ctx = new VectorizationContext("name", 
mapColumnNames);
 
-    GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, "A", 
TypeInfoFactory.longTypeInfo);
+    GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, 
GenericUDAFEvaluator.Mode.PARTIAL1, "A", TypeInfoFactory.longTypeInfo);
 
     CompilationOpContext cCtx = new CompilationOpContext();
     VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc);

http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java 
b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
index 9d4ca76..3295372 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
@@ -34,6 +34,7 @@ import 
org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUD
 import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncAbsLongToLong;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.*;
+import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode;
 import org.apache.hadoop.hive.ql.udf.generic.*;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
@@ -107,6 +108,7 @@ public class TestVectorizer {
 
     GroupByOperator gbyOp = new GroupByOperator(new CompilationOpContext());
     gbyOp.setConf(desc);
+    desc.setMode(GroupByDesc.Mode.HASH);
 
     Vectorizer v = new Vectorizer();
     Assert.assertTrue(v.validateMapWorkOperator(gbyOp, null, false));
@@ -148,9 +150,9 @@ public class TestVectorizer {
     Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, 
VectorExpressionDescriptor.Mode.FILTER));
     Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, 
VectorExpressionDescriptor.Mode.PROJECTION));
   }
- 
+
   /**
-  * prepareAbstractMapJoin prepares a join operator descriptor, used as helper 
by SMB and Map join tests. 
+  * prepareAbstractMapJoin prepares a join operator descriptor, used as helper 
by SMB and Map join tests.
   */
   private void prepareAbstractMapJoin(AbstractMapJoinOperator<? extends 
MapJoinDesc> map, MapJoinDesc mjdesc) {
       mjdesc.setPosBigTable(0);
@@ -189,15 +191,15 @@ public class TestVectorizer {
   public void testValidateMapJoinOperator() {
     MapJoinOperator map = new MapJoinOperator(new CompilationOpContext());
     MapJoinDesc mjdesc = new MapJoinDesc();
-    
+
     prepareAbstractMapJoin(map, mjdesc);
     map.setConf(mjdesc);
- 
+
     Vectorizer vectorizer = new Vectorizer();
     Assert.assertTrue(vectorizer.validateMapWorkOperator(map, null, false));
   }
 
-  
+
   /**
   * testValidateSMBJoinOperator validates that the SMB join operator can be 
vectorized.
   */
@@ -205,11 +207,11 @@ public class TestVectorizer {
   public void testValidateSMBJoinOperator() {
       SMBMapJoinOperator map = new SMBMapJoinOperator(new 
CompilationOpContext());
       SMBJoinDesc mjdesc = new SMBJoinDesc();
-      
+
       prepareAbstractMapJoin(map, mjdesc);
       map.setConf(mjdesc);
-    
+
       Vectorizer vectorizer = new Vectorizer();
-      Assert.assertTrue(vectorizer.validateMapWorkOperator(map, null, false)); 
+      Assert.assertTrue(vectorizer.validateMapWorkOperator(map, null, false));
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/queries/clientpositive/vector_count.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_count.q 
b/ql/src/test/queries/clientpositive/vector_count.q
new file mode 100644
index 0000000..341db74
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_count.q
@@ -0,0 +1,26 @@
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+SET hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=none;
+
+-- SORT_QUERY_RESULTS
+
+create table abcd_txt (a int, b int, c int, d int);
+LOAD DATA LOCAL INPATH '../../data/files/in4.txt' INTO TABLE abcd_txt;
+
+create table abcd stored as orc as select * from abcd_txt;
+
+select * from abcd;
+set hive.map.aggr=true;
+explain select a, count(distinct b), count(distinct c), sum(d) from abcd group 
by a;
+select a, count(distinct b), count(distinct c), sum(d) from abcd group by a;
+
+explain select count(1), count(*), count(a), count(b), count(c), count(d), 
count(distinct a), count(distinct b), count(distinct c), count(distinct d), 
count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct 
a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), 
count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), 
count(distinct a,b,c,d) from abcd;
+select count(1), count(*), count(a), count(b), count(c), count(d), 
count(distinct a), count(distinct b), count(distinct c), count(distinct d), 
count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct 
a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), 
count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), 
count(distinct a,b,c,d) from abcd;
+
+set hive.map.aggr=false;
+explain select a, count(distinct b), count(distinct c), sum(d) from abcd group 
by a;
+select a, count(distinct b), count(distinct c), sum(d) from abcd group by a;
+
+explain select count(1), count(*), count(a), count(b), count(c), count(d), 
count(distinct a), count(distinct b), count(distinct c), count(distinct d), 
count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct 
a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), 
count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), 
count(distinct a,b,c,d) from abcd;
+select count(1), count(*), count(a), count(b), count(c), count(d), 
count(distinct a), count(distinct b), count(distinct c), count(distinct d), 
count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct 
a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), 
count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), 
count(distinct a,b,c,d) from abcd;

http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/queries/clientpositive/vector_groupby4.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_groupby4.q 
b/ql/src/test/queries/clientpositive/vector_groupby4.q
new file mode 100644
index 0000000..a59d1a8
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_groupby4.q
@@ -0,0 +1,23 @@
+set hive.explain.user=false;
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+set hive.mapred.mode=nonstrict;
+set hive.map.aggr=false;
+set hive.groupby.skewindata=true;
+SET hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=none;
+
+CREATE TABLE srcorc STORED AS ORC AS SELECT * FROM src;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(c1 STRING) STORED AS ORC;
+
+EXPLAIN
+FROM srcorc
+INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY 
substr(srcorc.key,1,1);
+
+FROM srcorc
+INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY 
substr(srcorc.key,1,1);
+
+SELECT dest1.* FROM dest1;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/queries/clientpositive/vector_groupby6.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_groupby6.q 
b/ql/src/test/queries/clientpositive/vector_groupby6.q
new file mode 100644
index 0000000..89c7a19
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_groupby6.q
@@ -0,0 +1,24 @@
+set hive.explain.user=false;
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+set hive.mapred.mode=nonstrict;
+set hive.map.aggr=false;
+set hive.groupby.skewindata=true;
+SET hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=none;
+
+CREATE TABLE srcorc STORED AS ORC AS SELECT * FROM src;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(c1 STRING) STORED AS ORC;
+
+EXPLAIN
+FROM srcorc
+INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1);
+
+FROM srcorc
+INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1);
+
+SELECT dest1.* FROM dest1;
+
+

http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/results/clientpositive/tez/vector_aggregate_without_gby.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/tez/vector_aggregate_without_gby.q.out 
b/ql/src/test/results/clientpositive/tez/vector_aggregate_without_gby.q.out
index ab627b5..3bbbb46 100644
--- a/ql/src/test/results/clientpositive/tez/vector_aggregate_without_gby.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_aggregate_without_gby.q.out
@@ -47,11 +47,11 @@ Stage-0
     limit:-1
     Stage-1
       Reducer 2 vectorized
-      File Output Operator [FS_7]
-        Group By Operator [GBY_12] (rows=1 width=88)
+      File Output Operator [FS_14]
+        Group By Operator [GBY_13] (rows=1 width=88)
           
Output:["_col0","_col1"],aggregations:["max(VALUE._col0)","max(VALUE._col1)"]
         <-Map 1 [SIMPLE_EDGE] vectorized
-          SHUFFLE [RS_4]
+          SHUFFLE [RS_12]
             Group By Operator [GBY_11] (rows=1 width=88)
               Output:["_col0","_col1"],aggregations:["max(dt)","max(greg_dt)"]
               Select Operator [SEL_10] (rows=3 width=102)

http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/results/clientpositive/tez/vector_auto_smb_mapjoin_14.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/tez/vector_auto_smb_mapjoin_14.q.out 
b/ql/src/test/results/clientpositive/tez/vector_auto_smb_mapjoin_14.q.out
index 67ddd9e..d37dc51 100644
--- a/ql/src/test/results/clientpositive/tez/vector_auto_smb_mapjoin_14.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_auto_smb_mapjoin_14.q.out
@@ -64,7 +64,7 @@ Stage-0
     limit:-1
     Stage-1
       Reducer 2 vectorized
-      File Output Operator [FS_14]
+      File Output Operator [FS_22]
         Group By Operator [GBY_21] (rows=1 width=8)
           Output:["_col0"],aggregations:["count(VALUE._col0)"]
         <-Map 1 [SIMPLE_EDGE]
@@ -134,11 +134,11 @@ Stage-0
     limit:-1
     Stage-1
       Reducer 3 vectorized
-      File Output Operator [FS_19]
-        Group By Operator [GBY_29] (rows=1 width=8)
+      File Output Operator [FS_31]
+        Group By Operator [GBY_30] (rows=1 width=8)
           Output:["_col0"],aggregations:["count(VALUE._col0)"]
         <-Reducer 2 [SIMPLE_EDGE] vectorized
-          SHUFFLE [RS_16]
+          SHUFFLE [RS_29]
             Group By Operator [GBY_28] (rows=1 width=8)
               Output:["_col0"],aggregations:["count()"]
               Select Operator [SEL_27] (rows=5 width=93)
@@ -365,7 +365,7 @@ Stage-0
     limit:-1
     Stage-1
       Reducer 2 vectorized
-      File Output Operator [FS_14]
+      File Output Operator [FS_22]
         Group By Operator [GBY_21] (rows=1 width=8)
           Output:["_col0"],aggregations:["count(VALUE._col0)"]
         <-Map 1 [SIMPLE_EDGE]
@@ -444,7 +444,7 @@ Stage-0
     limit:-1
     Stage-1
       Reducer 2 vectorized
-      File Output Operator [FS_14]
+      File Output Operator [FS_22]
         Group By Operator [GBY_21] (rows=1 width=8)
           Output:["_col0"],aggregations:["count(VALUE._col0)"]
         <-Map 1 [SIMPLE_EDGE]
@@ -547,7 +547,7 @@ Stage-0
     limit:-1
     Stage-1
       Reducer 2 vectorized
-      File Output Operator [FS_14]
+      File Output Operator [FS_22]
         Group By Operator [GBY_21] (rows=1 width=8)
           Output:["_col0"],aggregations:["count(VALUE._col0)"]
         <-Map 1 [SIMPLE_EDGE]
@@ -640,7 +640,7 @@ Stage-0
     limit:-1
     Stage-1
       Reducer 2 vectorized
-      File Output Operator [FS_14]
+      File Output Operator [FS_22]
         Group By Operator [GBY_21] (rows=1 width=8)
           Output:["_col0"],aggregations:["count(VALUE._col0)"]
         <-Map 1 [SIMPLE_EDGE]
@@ -710,7 +710,7 @@ Stage-0
     limit:-1
     Stage-1
       Reducer 3 vectorized
-      File Output Operator [FS_14]
+      File Output Operator [FS_29]
         Group By Operator [GBY_28] (rows=1 width=8)
           Output:["_col0"],aggregations:["count(VALUE._col0)"]
         <-Reducer 2 [SIMPLE_EDGE]
@@ -781,7 +781,7 @@ Stage-0
     limit:-1
     Stage-1
       Reducer 2 vectorized
-      File Output Operator [FS_14]
+      File Output Operator [FS_22]
         Group By Operator [GBY_21] (rows=1 width=8)
           Output:["_col0"],aggregations:["count(VALUE._col0)"]
         <-Map 1 [SIMPLE_EDGE]
@@ -852,7 +852,7 @@ Stage-0
     limit:-1
     Stage-1
       Reducer 2 vectorized
-      File Output Operator [FS_18]
+      File Output Operator [FS_32]
         Group By Operator [GBY_31] (rows=1 width=8)
           Output:["_col0"],aggregations:["count(VALUE._col0)"]
         <-Map 1 [SIMPLE_EDGE]
@@ -945,7 +945,7 @@ Stage-0
     limit:-1
     Stage-1
       Reducer 2 vectorized
-      File Output Operator [FS_14]
+      File Output Operator [FS_22]
         Group By Operator [GBY_21] (rows=1 width=8)
           Output:["_col0"],aggregations:["count(VALUE._col0)"]
         <-Map 1 [SIMPLE_EDGE]

http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/results/clientpositive/tez/vector_count.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_count.q.out 
b/ql/src/test/results/clientpositive/tez/vector_count.q.out
new file mode 100644
index 0000000..c854515
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_count.q.out
@@ -0,0 +1,314 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+create table abcd_txt (a int, b int, c int, d int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@abcd_txt
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+create table abcd_txt (a int, b int, c int, d int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@abcd_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in4.txt' INTO TABLE 
abcd_txt
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@abcd_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in4.txt' INTO TABLE 
abcd_txt
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@abcd_txt
+PREHOOK: query: create table abcd stored as orc as select * from abcd_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@abcd_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@abcd
+POSTHOOK: query: create table abcd stored as orc as select * from abcd_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@abcd_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@abcd
+POSTHOOK: Lineage: abcd.a SIMPLE [(abcd_txt)abcd_txt.FieldSchema(name:a, 
type:int, comment:null), ]
+POSTHOOK: Lineage: abcd.b SIMPLE [(abcd_txt)abcd_txt.FieldSchema(name:b, 
type:int, comment:null), ]
+POSTHOOK: Lineage: abcd.c SIMPLE [(abcd_txt)abcd_txt.FieldSchema(name:c, 
type:int, comment:null), ]
+POSTHOOK: Lineage: abcd.d SIMPLE [(abcd_txt)abcd_txt.FieldSchema(name:d, 
type:int, comment:null), ]
+PREHOOK: query: select * from abcd
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select * from abcd
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+10     100     45      4
+10     100     NULL    5
+10     1000    50      1
+100    100     10      3
+12     100     75      7
+12     NULL    80      2
+NULL   35      23      6
+PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) 
from abcd group by a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select a, count(distinct b), count(distinct c), 
sum(d) from abcd group by a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: abcd
+                  Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: a (type: int), b (type: int), c (type: int), 
d (type: int)
+                    outputColumnNames: a, b, c, d
+                    Statistics: Num rows: 7 Data size: 100 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count(DISTINCT b), count(DISTINCT c), 
sum(d)
+                      keys: a (type: int), b (type: int), c (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
+                      Statistics: Num rows: 7 Data size: 100 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int), _col1 (type: int), 
_col2 (type: int)
+                        sort order: +++
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 7 Data size: 100 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col5 (type: bigint)
+            Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(DISTINCT KEY._col1:0._col0), 
count(DISTINCT KEY._col1:1._col0), sum(VALUE._col2)
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select a, count(distinct b), count(distinct c), sum(d) from 
abcd group by a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select a, count(distinct b), count(distinct c), sum(d) from 
abcd group by a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+10     2       2       10
+100    1       1       3
+12     1       2       9
+NULL   1       1       6
+PREHOOK: query: explain select count(1), count(*), count(a), count(b), 
count(c), count(d), count(distinct a), count(distinct b), count(distinct c), 
count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct 
c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), 
count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), 
count(distinct a,b,d), count(distinct a,b,c,d) from abcd
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(1), count(*), count(a), count(b), 
count(c), count(d), count(distinct a), count(distinct b), count(distinct c), 
count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct 
c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), 
count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), 
count(distinct a,b,d), count(distinct a,b,c,d) from abcd
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: abcd
+                  Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: a (type: int), b (type: int), c (type: int), 
d (type: int)
+                    outputColumnNames: _col1, _col2, _col3, _col4
+                    Statistics: Num rows: 7 Data size: 100 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count(1), count(), count(_col1), 
count(_col2), count(_col3), count(_col4), count(DISTINCT _col1), count(DISTINCT 
_col2), count(DISTINCT _col3), count(DISTINCT _col4), count(DISTINCT _col1, 
_col2), count(DISTINCT _col2, _col3), count(DISTINCT _col3, _col4), 
count(DISTINCT _col1, _col4), count(DISTINCT _col1, _col3), count(DISTINCT 
_col2, _col4), count(DISTINCT _col1, _col2, _col3), count(DISTINCT _col2, 
_col3, _col4), count(DISTINCT _col1, _col3, _col4), count(DISTINCT _col1, 
_col2, _col4), count(DISTINCT _col1, _col2, _col3, _col4)
+                      keys: _col1 (type: int), _col2 (type: int), _col3 (type: 
int), _col4 (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, 
_col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
+                      Statistics: Num rows: 7 Data size: 100 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int), _col1 (type: int), 
_col2 (type: int), _col3 (type: int)
+                        sort order: ++++
+                        Statistics: Num rows: 7 Data size: 100 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col4 (type: bigint), _col5 (type: 
bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), 
_col9 (type: bigint)
+            Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0), count(VALUE._col1), 
count(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), 
count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), 
count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), 
count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT 
KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, 
KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), 
count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT 
KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, 
KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, 
KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, 
KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, 
KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, 
KEY._col0:14._col1, KEY._col0:14._col2, KEY.
 _col0:14._col3)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17, _col18, _col19, _col20
+                Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(1), count(*), count(a), count(b), count(c), 
count(d), count(distinct a), count(distinct b), count(distinct c), 
count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct 
c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), 
count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), 
count(distinct a,b,d), count(distinct a,b,c,d) from abcd
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1), count(*), count(a), count(b), count(c), 
count(d), count(distinct a), count(distinct b), count(distinct c), 
count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct 
c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), 
count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), 
count(distinct a,b,d), count(distinct a,b,c,d) from abcd
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+7      7       6       6       6       7       3       3       6       7       
4       5       6       6       5       6       4       5       5       5       
4
+PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) 
from abcd group by a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select a, count(distinct b), count(distinct c), 
sum(d) from abcd group by a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: abcd
+                  Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: a (type: int), b (type: int), c (type: int), 
d (type: int)
+                    outputColumnNames: a, b, c, d
+                    Statistics: Num rows: 7 Data size: 100 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: a (type: int), b (type: int), c (type: 
int)
+                      sort order: +++
+                      Map-reduce partition columns: a (type: int)
+                      Statistics: Num rows: 7 Data size: 100 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: d (type: int)
+            Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(DISTINCT KEY._col1:0._col0), 
count(DISTINCT KEY._col1:1._col0), sum(VALUE._col0)
+                keys: KEY._col0 (type: int)
+                mode: complete
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select a, count(distinct b), count(distinct c), sum(d) from 
abcd group by a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select a, count(distinct b), count(distinct c), sum(d) from 
abcd group by a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+10     2       2       10
+100    1       1       3
+12     1       2       9
+NULL   1       1       6
+PREHOOK: query: explain select count(1), count(*), count(a), count(b), 
count(c), count(d), count(distinct a), count(distinct b), count(distinct c), 
count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct 
c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), 
count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), 
count(distinct a,b,d), count(distinct a,b,c,d) from abcd
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(1), count(*), count(a), count(b), 
count(c), count(d), count(distinct a), count(distinct b), count(distinct c), 
count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct 
c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), 
count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), 
count(distinct a,b,d), count(distinct a,b,c,d) from abcd
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: abcd
+                  Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: a (type: int), b (type: int), c (type: int), 
d (type: int)
+                    outputColumnNames: _col1, _col2, _col3, _col4
+                    Statistics: Num rows: 7 Data size: 100 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col1 (type: int), _col2 (type: int), 
_col3 (type: int), _col4 (type: int)
+                      sort order: ++++
+                      Statistics: Num rows: 7 Data size: 100 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(1), count(), count(KEY._col0:0._col0), 
count(KEY._col0:1._col0), count(KEY._col0:2._col0), count(KEY._col0:3._col0), 
count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), 
count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), 
count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT 
KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, 
KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), 
count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT 
KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, 
KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, 
KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, 
KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, 
KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, 
KEY._col0:14._col1, KEY._col0:14._col2, K
 EY._col0:14._col3)
+                mode: complete
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17, _col18, _col19, _col20
+                Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(1), count(*), count(a), count(b), count(c), 
count(d), count(distinct a), count(distinct b), count(distinct c), 
count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct 
c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), 
count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), 
count(distinct a,b,d), count(distinct a,b,c,d) from abcd
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1), count(*), count(a), count(b), count(c), 
count(d), count(distinct a), count(distinct b), count(distinct c), 
count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct 
c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), 
count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), 
count(distinct a,b,d), count(distinct a,b,c,d) from abcd
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+7      7       6       6       6       7       3       3       6       7       
4       5       6       6       5       6       4       5       5       5       
4

http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/results/clientpositive/tez/vector_groupby4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_groupby4.q.out 
b/ql/src/test/results/clientpositive/tez/vector_groupby4.q.out
new file mode 100644
index 0000000..401ab09
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_groupby4.q.out
@@ -0,0 +1,137 @@
+PREHOOK: query: CREATE TABLE srcorc STORED AS ORC AS SELECT * FROM src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcorc
+POSTHOOK: query: CREATE TABLE srcorc STORED AS ORC AS SELECT * FROM src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcorc
+POSTHOOK: Lineage: srcorc.key SIMPLE [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: srcorc.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(c1 STRING) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest1
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(c1 STRING) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest1
+PREHOOK: query: EXPLAIN
+FROM srcorc
+INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY 
substr(srcorc.key,1,1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM srcorc
+INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY 
substr(srcorc.key,1,1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: srcorc
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: substr(key, 1, 1) (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: rand() (type: double)
+                      Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Reducer 2 
+            Execution mode: vectorized
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: partial1
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
+        Reducer 3 
+            Execution mode: vectorized
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: final
+                outputColumnNames: _col0
+                Statistics: Num rows: 250 Data size: 44000 Basic stats: 
COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 250 Data size: 44000 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                      serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                      name: default.dest1
+
+  Stage: Stage-2
+    Dependency Collection
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+              output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+              serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+              name: default.dest1
+
+  Stage: Stage-3
+    Stats-Aggr Operator
+
+PREHOOK: query: FROM srcorc
+INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY 
substr(srcorc.key,1,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcorc
+PREHOOK: Output: default@dest1
+POSTHOOK: query: FROM srcorc
+INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY 
substr(srcorc.key,1,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcorc
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcorc)srcorc.FieldSchema(name:key, 
type:string, comment:null), ]
+PREHOOK: query: SELECT dest1.* FROM dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT dest1.* FROM dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9

http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/results/clientpositive/tez/vector_groupby6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_groupby6.q.out 
b/ql/src/test/results/clientpositive/tez/vector_groupby6.q.out
new file mode 100644
index 0000000..c9174e6
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_groupby6.q.out
@@ -0,0 +1,137 @@
+PREHOOK: query: CREATE TABLE srcorc STORED AS ORC AS SELECT * FROM src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcorc
+POSTHOOK: query: CREATE TABLE srcorc STORED AS ORC AS SELECT * FROM src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcorc
+POSTHOOK: Lineage: srcorc.key SIMPLE [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: srcorc.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(c1 STRING) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest1
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(c1 STRING) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest1
+PREHOOK: query: EXPLAIN
+FROM srcorc
+INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM srcorc
+INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: srcorc
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: substr(value, 5, 1) (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: rand() (type: double)
+                      Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized
+        Reducer 2 
+            Execution mode: vectorized
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: partial1
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
+        Reducer 3 
+            Execution mode: vectorized
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: string)
+                mode: final
+                outputColumnNames: _col0
+                Statistics: Num rows: 250 Data size: 44000 Basic stats: 
COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 250 Data size: 44000 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                      serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                      name: default.dest1
+
+  Stage: Stage-2
+    Dependency Collection
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+              output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+              serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+              name: default.dest1
+
+  Stage: Stage-3
+    Stats-Aggr Operator
+
+PREHOOK: query: FROM srcorc
+INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcorc
+PREHOOK: Output: default@dest1
+POSTHOOK: query: FROM srcorc
+INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcorc
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcorc)srcorc.FieldSchema(name:value, 
type:string, comment:null), ]
+PREHOOK: query: SELECT dest1.* FROM dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT dest1.* FROM dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9

http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out 
b/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out
index d69012e..2d5e782 100644
--- a/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out
@@ -27,8 +27,8 @@ Stage-0
     limit:-1
     Stage-1
       Reducer 2 vectorized
-      File Output Operator [FS_33]
-        Select Operator [SEL_32] (rows=302 width=10)
+      File Output Operator [FS_36]
+        Select Operator [SEL_35] (rows=302 width=10)
           Output:["_col0","_col1"]
         <-Map 1 [SIMPLE_EDGE]
           SHUFFLE [RS_21]
@@ -48,9 +48,9 @@ Stage-0
                 <-Map Join Operator [MAPJOIN_27] (rows=550 width=10)
                     Conds:(Inner),Output:["_col0","_col1"]
                   <-Reducer 4 [BROADCAST_EDGE] vectorized
-                    BROADCAST [RS_14]
-                      Select Operator [SEL_10] (rows=1 width=8)
-                        Filter Operator [FIL_9] (rows=1 width=8)
+                    BROADCAST [RS_34]
+                      Select Operator [SEL_33] (rows=1 width=8)
+                        Filter Operator [FIL_32] (rows=1 width=8)
                           predicate:(_col0 = 0)
                           Group By Operator [GBY_31] (rows=1 width=8)
                             
Output:["_col0"],aggregations:["count(VALUE._col0)"]

http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out 
b/ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out
index 93137f1..9492f8a 100644
--- a/ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out
@@ -113,7 +113,7 @@ Stage-0
       Reducer 2
       File Output Operator [FS_10]
         Merge Join Operator [MERGEJOIN_21] (rows=2 width=112)
-          
Conds:RS_23._col2=RS_27._col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
+          
Conds:RS_23._col2=RS_28._col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
         <-Map 1 [SIMPLE_EDGE] vectorized
           SHUFFLE [RS_23]
             PartitionCols:_col2
@@ -121,16 +121,16 @@ Stage-0
               Output:["_col0","_col1","_col2"]
               TableScan [TS_0] (rows=2 width=102)
                 
default@char_tbl1,c1,Tbl:COMPLETE,Col:NONE,Output:["name","age"]
-          Dynamic Partitioning Event Operator [EVENT_20] (rows=2 width=102)
+          Dynamic Partitioning Event Operator [EVENT_26] (rows=2 width=102)
             Group By Operator [GBY_25] (rows=2 width=102)
               Output:["_col0"],keys:_col0
               Select Operator [SEL_24] (rows=2 width=102)
                 Output:["_col0"]
                  Please refer to the previous Select Operator [SEL_22]
         <-Map 3 [SIMPLE_EDGE] vectorized
-          SHUFFLE [RS_27]
+          SHUFFLE [RS_28]
             PartitionCols:_col2
-            Select Operator [SEL_26] (rows=2 width=101)
+            Select Operator [SEL_27] (rows=2 width=101)
               Output:["_col0","_col1","_col2"]
               TableScan [TS_3] (rows=2 width=101)
                 
default@char_tbl2,c2,Tbl:COMPLETE,Col:NONE,Output:["name","age"]

http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out 
b/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out
index d5c01d0..b544784 100644
--- a/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out
+++ b/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out
@@ -360,6 +360,7 @@ STAGE PLANS:
                         Statistics: Num rows: 12288 Data size: 2641964 Basic 
stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
         Reducer 2 
+            Execution mode: vectorized
             Reduce Operator Tree:
               Group By Operator
                 keys: KEY._col0 (type: tinyint), KEY._col1 (type: double)

http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/results/clientpositive/tez/vectorized_timestamp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vectorized_timestamp.q.out 
b/ql/src/test/results/clientpositive/tez/vectorized_timestamp.q.out
index fb272dd..4a19861 100644
--- a/ql/src/test/results/clientpositive/tez/vectorized_timestamp.q.out
+++ b/ql/src/test/results/clientpositive/tez/vectorized_timestamp.q.out
@@ -132,13 +132,13 @@ Stage-0
     limit:-1
     Stage-1
       Reducer 2 vectorized
-      File Output Operator [FS_6]
-        Select Operator [SEL_5] (rows=1 width=80)
+      File Output Operator [FS_12]
+        Select Operator [SEL_11] (rows=1 width=80)
           Output:["_col0","_col1","_col2"]
-          Group By Operator [GBY_9] (rows=1 width=80)
+          Group By Operator [GBY_10] (rows=1 width=80)
             
Output:["_col0","_col1"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"]
           <-Map 1 [SIMPLE_EDGE] vectorized
-            SHUFFLE [RS_3]
+            SHUFFLE [RS_9]
               Group By Operator [GBY_8] (rows=1 width=80)
                 Output:["_col0","_col1"],aggregations:["min(ts)","max(ts)"]
                 Select Operator [SEL_7] (rows=2 width=40)

http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/results/clientpositive/vector_count.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_count.q.out 
b/ql/src/test/results/clientpositive/vector_count.q.out
new file mode 100644
index 0000000..734ef39
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_count.q.out
@@ -0,0 +1,286 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+create table abcd_txt (a int, b int, c int, d int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@abcd_txt
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+create table abcd_txt (a int, b int, c int, d int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@abcd_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in4.txt' INTO TABLE 
abcd_txt
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@abcd_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in4.txt' INTO TABLE 
abcd_txt
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@abcd_txt
+PREHOOK: query: create table abcd stored as orc as select * from abcd_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@abcd_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@abcd
+POSTHOOK: query: create table abcd stored as orc as select * from abcd_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@abcd_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@abcd
+POSTHOOK: Lineage: abcd.a SIMPLE [(abcd_txt)abcd_txt.FieldSchema(name:a, 
type:int, comment:null), ]
+POSTHOOK: Lineage: abcd.b SIMPLE [(abcd_txt)abcd_txt.FieldSchema(name:b, 
type:int, comment:null), ]
+POSTHOOK: Lineage: abcd.c SIMPLE [(abcd_txt)abcd_txt.FieldSchema(name:c, 
type:int, comment:null), ]
+POSTHOOK: Lineage: abcd.d SIMPLE [(abcd_txt)abcd_txt.FieldSchema(name:d, 
type:int, comment:null), ]
+PREHOOK: query: select * from abcd
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select * from abcd
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+10     100     45      4
+10     100     NULL    5
+10     1000    50      1
+100    100     10      3
+12     100     75      7
+12     NULL    80      2
+NULL   35      23      6
+PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) 
from abcd group by a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select a, count(distinct b), count(distinct c), 
sum(d) from abcd group by a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: abcd
+            Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: a (type: int), b (type: int), c (type: int), d 
(type: int)
+              outputColumnNames: a, b, c, d
+              Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: count(DISTINCT b), count(DISTINCT c), sum(d)
+                keys: a (type: int), b (type: int), c (type: int)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int), _col1 (type: int), _col2 
(type: int)
+                  sort order: +++
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+                  value expressions: _col5 (type: bigint)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT 
KEY._col1:1._col0), sum(VALUE._col2)
+          keys: KEY._col0 (type: int)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column 
stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select a, count(distinct b), count(distinct c), sum(d) from 
abcd group by a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select a, count(distinct b), count(distinct c), sum(d) from 
abcd group by a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+10     2       2       10
+100    1       1       3
+12     1       2       9
+NULL   1       1       6
+PREHOOK: query: explain select count(1), count(*), count(a), count(b), 
count(c), count(d), count(distinct a), count(distinct b), count(distinct c), 
count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct 
c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), 
count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), 
count(distinct a,b,d), count(distinct a,b,c,d) from abcd
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(1), count(*), count(a), count(b), 
count(c), count(d), count(distinct a), count(distinct b), count(distinct c), 
count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct 
c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), 
count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), 
count(distinct a,b,d), count(distinct a,b,c,d) from abcd
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: abcd
+            Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: a (type: int), b (type: int), c (type: int), d 
(type: int)
+              outputColumnNames: _col1, _col2, _col3, _col4
+              Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+              Group By Operator
+                aggregations: count(1), count(), count(_col1), count(_col2), 
count(_col3), count(_col4), count(DISTINCT _col1), count(DISTINCT _col2), 
count(DISTINCT _col3), count(DISTINCT _col4), count(DISTINCT _col1, _col2), 
count(DISTINCT _col2, _col3), count(DISTINCT _col3, _col4), count(DISTINCT 
_col1, _col4), count(DISTINCT _col1, _col3), count(DISTINCT _col2, _col4), 
count(DISTINCT _col1, _col2, _col3), count(DISTINCT _col2, _col3, _col4), 
count(DISTINCT _col1, _col3, _col4), count(DISTINCT _col1, _col2, _col4), 
count(DISTINCT _col1, _col2, _col3, _col4)
+                keys: _col1 (type: int), _col2 (type: int), _col3 (type: int), 
_col4 (type: int)
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
+                Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int), _col1 (type: int), _col2 
(type: int), _col3 (type: int)
+                  sort order: ++++
+                  Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+                  value expressions: _col4 (type: bigint), _col5 (type: 
bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), 
_col9 (type: bigint)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0), count(VALUE._col1), 
count(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), 
count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), 
count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), 
count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT 
KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, 
KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), 
count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT 
KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, 
KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, 
KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, 
KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, 
KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, 
KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:
 14._col3)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, 
_col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, 
_col17, _col18, _col19, _col20
+          Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(1), count(*), count(a), count(b), count(c), 
count(d), count(distinct a), count(distinct b), count(distinct c), 
count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct 
c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), 
count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), 
count(distinct a,b,d), count(distinct a,b,c,d) from abcd
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1), count(*), count(a), count(b), count(c), 
count(d), count(distinct a), count(distinct b), count(distinct c), 
count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct 
c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), 
count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), 
count(distinct a,b,d), count(distinct a,b,c,d) from abcd
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+7      7       6       6       6       7       3       3       6       7       
4       5       6       6       5       6       4       5       5       5       
4
+PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) 
from abcd group by a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select a, count(distinct b), count(distinct c), 
sum(d) from abcd group by a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: abcd
+            Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: a (type: int), b (type: int), c (type: int), d 
(type: int)
+              outputColumnNames: a, b, c, d
+              Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+              Reduce Output Operator
+                key expressions: a (type: int), b (type: int), c (type: int)
+                sort order: +++
+                Map-reduce partition columns: a (type: int)
+                Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+                value expressions: d (type: int)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT 
KEY._col1:1._col0), sum(VALUE._col0)
+          keys: KEY._col0 (type: int)
+          mode: complete
+          outputColumnNames: _col0, _col1, _col2, _col3
+          Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column 
stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select a, count(distinct b), count(distinct c), sum(d) from 
abcd group by a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select a, count(distinct b), count(distinct c), sum(d) from 
abcd group by a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+10     2       2       10
+100    1       1       3
+12     1       2       9
+NULL   1       1       6
+PREHOOK: query: explain select count(1), count(*), count(a), count(b), 
count(c), count(d), count(distinct a), count(distinct b), count(distinct c), 
count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct 
c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), 
count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), 
count(distinct a,b,d), count(distinct a,b,c,d) from abcd
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(1), count(*), count(a), count(b), 
count(c), count(d), count(distinct a), count(distinct b), count(distinct c), 
count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct 
c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), 
count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), 
count(distinct a,b,d), count(distinct a,b,c,d) from abcd
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: abcd
+            Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: a (type: int), b (type: int), c (type: int), d 
(type: int)
+              outputColumnNames: _col1, _col2, _col3, _col4
+              Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col1 (type: int), _col2 (type: int), _col3 
(type: int), _col4 (type: int)
+                sort order: ++++
+                Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE 
Column stats: NONE
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(1), count(), count(KEY._col0:0._col0), 
count(KEY._col0:1._col0), count(KEY._col0:2._col0), count(KEY._col0:3._col0), 
count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), 
count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), 
count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT 
KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, 
KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), 
count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT 
KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, 
KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, 
KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, 
KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, 
KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, 
KEY._col0:14._col1, KEY._col0:14._col2, KEY._co
 l0:14._col3)
+          mode: complete
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, 
_col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, 
_col17, _col18, _col19, _col20
+          Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(1), count(*), count(a), count(b), count(c), 
count(d), count(distinct a), count(distinct b), count(distinct c), 
count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct 
c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), 
count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), 
count(distinct a,b,d), count(distinct a,b,c,d) from abcd
+PREHOOK: type: QUERY
+PREHOOK: Input: default@abcd
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1), count(*), count(a), count(b), count(c), 
count(d), count(distinct a), count(distinct b), count(distinct c), 
count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct 
c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), 
count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), 
count(distinct a,b,d), count(distinct a,b,c,d) from abcd
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@abcd
+#### A masked pattern was here ####
+7      7       6       6       6       7       3       3       6       7       
4       5       6       6       5       6       4       5       5       5       
4

http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/results/clientpositive/vector_groupby4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_groupby4.q.out 
b/ql/src/test/results/clientpositive/vector_groupby4.q.out
new file mode 100644
index 0000000..8041511
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_groupby4.q.out
@@ -0,0 +1,134 @@
+PREHOOK: query: CREATE TABLE srcorc STORED AS ORC AS SELECT * FROM src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcorc
+POSTHOOK: query: CREATE TABLE srcorc STORED AS ORC AS SELECT * FROM src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcorc
+POSTHOOK: Lineage: srcorc.key SIMPLE [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: srcorc.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(c1 STRING) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest1
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(c1 STRING) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest1
+PREHOOK: query: EXPLAIN
+FROM srcorc
+INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY 
substr(srcorc.key,1,1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM srcorc
+INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY 
substr(srcorc.key,1,1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: srcorc
+            Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: substr(key, 1, 1) (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE 
Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Map-reduce partition columns: rand() (type: double)
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string)
+          mode: partial1
+          outputColumnNames: _col0
+          Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE 
Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE 
Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string)
+          mode: final
+          outputColumnNames: _col0
+          Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE 
Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                name: default.dest1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+              output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+              serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+              name: default.dest1
+
+  Stage: Stage-3
+    Stats-Aggr Operator
+
+PREHOOK: query: FROM srcorc
+INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY 
substr(srcorc.key,1,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcorc
+PREHOOK: Output: default@dest1
+POSTHOOK: query: FROM srcorc
+INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY 
substr(srcorc.key,1,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcorc
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcorc)srcorc.FieldSchema(name:key, 
type:string, comment:null), ]
+PREHOOK: query: SELECT dest1.* FROM dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT dest1.* FROM dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9

http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/results/clientpositive/vector_groupby6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_groupby6.q.out 
b/ql/src/test/results/clientpositive/vector_groupby6.q.out
new file mode 100644
index 0000000..63fe8f3
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_groupby6.q.out
@@ -0,0 +1,134 @@
+PREHOOK: query: CREATE TABLE srcorc STORED AS ORC AS SELECT * FROM src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcorc
+POSTHOOK: query: CREATE TABLE srcorc STORED AS ORC AS SELECT * FROM src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcorc
+POSTHOOK: Lineage: srcorc.key SIMPLE [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: srcorc.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(c1 STRING) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest1
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE dest1(c1 STRING) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dest1
+PREHOOK: query: EXPLAIN
+FROM srcorc
+INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM srcorc
+INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: srcorc
+            Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: substr(value, 5, 1) (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE 
Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Map-reduce partition columns: rand() (type: double)
+                Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string)
+          mode: partial1
+          outputColumnNames: _col0
+          Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE 
Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE 
Column stats: NONE
+      Reduce Operator Tree:
+        Group By Operator
+          keys: KEY._col0 (type: string)
+          mode: final
+          outputColumnNames: _col0
+          Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE 
Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+                serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+                name: default.dest1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+              output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+              serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+              name: default.dest1
+
+  Stage: Stage-3
+    Stats-Aggr Operator
+
+PREHOOK: query: FROM srcorc
+INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcorc
+PREHOOK: Output: default@dest1
+POSTHOOK: query: FROM srcorc
+INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcorc
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcorc)srcorc.FieldSchema(name:value, 
type:string, comment:null), ]
+PREHOOK: query: SELECT dest1.* FROM dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT dest1.* FROM dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9

[1/2] hive git commit: HIVE-13713: We miss vectorization in a case of count(*) when aggregation mode is COMPLETE (Matt McCline, reviewed by Sergey Shelukhin)

Reply via email to