Repository: hive Updated Branches: refs/heads/master d74d5637f -> 8136a10c1
http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java index 451947b..f5b5d9d 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java @@ -50,6 +50,8 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; @@ -84,6 +86,7 @@ public class TestVectorGroupByOperator { private static AggregationDesc buildAggregationDesc( VectorizationContext ctx, String aggregate, + GenericUDAFEvaluator.Mode mode, String column, TypeInfo typeInfo) { @@ -94,6 +97,7 @@ public class TestVectorGroupByOperator { AggregationDesc agg = new AggregationDesc(); agg.setGenericUDAFName(aggregate); + agg.setMode(mode); agg.setParameters(params); return agg; @@ -102,6 +106,7 @@ public class TestVectorGroupByOperator { VectorizationContext ctx) { AggregationDesc agg = new AggregationDesc(); agg.setGenericUDAFName("COUNT"); + agg.setMode(GenericUDAFEvaluator.Mode.PARTIAL1); agg.setParameters(new ArrayList<ExprNodeDesc>()); return agg; } @@ -110,10 +115,11 @@ public class TestVectorGroupByOperator { private static GroupByDesc buildGroupByDescType( VectorizationContext ctx, String aggregate, + GenericUDAFEvaluator.Mode mode, String column, TypeInfo dataType) { - AggregationDesc agg = buildAggregationDesc(ctx, aggregate, + AggregationDesc agg = buildAggregationDesc(ctx, aggregate, mode, column, dataType); ArrayList<AggregationDesc> aggs = new ArrayList<AggregationDesc>(); aggs.add(agg); @@ -124,6 +130,7 @@ public class TestVectorGroupByOperator { GroupByDesc desc = new GroupByDesc(); desc.setOutputColumnNames(outputColumnNames); desc.setAggregators(aggs); + desc.getVectorDesc().setProcessingMode(ProcessingMode.GLOBAL); return desc; } @@ -154,7 +161,8 @@ public class TestVectorGroupByOperator { String key, TypeInfo keyTypeInfo) { - GroupByDesc desc = buildGroupByDescType(ctx, aggregate, column, dataTypeInfo); + GroupByDesc desc = buildGroupByDescType(ctx, aggregate, GenericUDAFEvaluator.Mode.PARTIAL1, column, dataTypeInfo); + desc.getVectorDesc().setProcessingMode(ProcessingMode.HASH); ExprNodeDesc keyExp = buildColumnDesc(ctx, key, keyTypeInfo); ArrayList<ExprNodeDesc> keys = new ArrayList<ExprNodeDesc>(); @@ -1716,7 +1724,7 @@ public class TestVectorGroupByOperator { ArrayList<AggregationDesc> aggs = new ArrayList(1); aggs.add( - buildAggregationDesc(ctx, aggregateName, + buildAggregationDesc(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "value", TypeInfoFactory.getPrimitiveTypeInfo(columnTypes[i]))); for(i=0; i<columnTypes.length - 1; ++i) { @@ -1730,6 +1738,7 @@ public class TestVectorGroupByOperator { desc.setOutputColumnNames(outputColumnNames); desc.setAggregators(aggs); desc.setKeys(keysDesc); + desc.getVectorDesc().setProcessingMode(ProcessingMode.HASH); CompilationOpContext cCtx = new CompilationOpContext(); VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); @@ -1827,7 +1836,7 @@ public class TestVectorGroupByOperator { VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); Set<Object> keys = new HashSet<Object>(); - AggregationDesc agg = buildAggregationDesc(ctx, aggregateName, + AggregationDesc agg = buildAggregationDesc(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "Value", TypeInfoFactory.getPrimitiveTypeInfo(data.getTypes()[1])); ArrayList<AggregationDesc> aggs = new ArrayList<AggregationDesc>(); aggs.add(agg); @@ -1839,6 +1848,7 @@ public class TestVectorGroupByOperator { GroupByDesc desc = new GroupByDesc(); desc.setOutputColumnNames(outputColumnNames); desc.setAggregators(aggs); + desc.getVectorDesc().setProcessingMode(ProcessingMode.HASH); ExprNodeDesc keyExp = buildColumnDesc(ctx, "Key", TypeInfoFactory.getPrimitiveTypeInfo(data.getTypes()[0])); @@ -2242,6 +2252,7 @@ public class TestVectorGroupByOperator { VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); GroupByDesc desc = buildGroupByDescCountStar (ctx); + desc.getVectorDesc().setProcessingMode(ProcessingMode.HASH); CompilationOpContext cCtx = new CompilationOpContext(); VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); @@ -2271,9 +2282,9 @@ public class TestVectorGroupByOperator { mapColumnNames.add("A"); VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); - GroupByDesc desc = buildGroupByDescType(ctx, "count", "A", TypeInfoFactory.longTypeInfo); + GroupByDesc desc = buildGroupByDescType(ctx, "count", GenericUDAFEvaluator.Mode.FINAL, "A", TypeInfoFactory.longTypeInfo); VectorGroupByDesc vectorDesc = desc.getVectorDesc(); - vectorDesc.setIsReduceMergePartial(true); + vectorDesc.setProcessingMode(ProcessingMode.GLOBAL); // Use GLOBAL when no key for Reduce. CompilationOpContext cCtx = new CompilationOpContext(); VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); @@ -2303,7 +2314,7 @@ public class TestVectorGroupByOperator { mapColumnNames.add("A"); VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); - GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, "A", + GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "A", TypeInfoFactory.stringTypeInfo); CompilationOpContext cCtx = new CompilationOpContext(); @@ -2336,7 +2347,7 @@ public class TestVectorGroupByOperator { VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); GroupByDesc desc = - buildGroupByDescType(ctx, aggregateName, "A", TypeInfoFactory.getDecimalTypeInfo(30, 4)); + buildGroupByDescType(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "A", TypeInfoFactory.getDecimalTypeInfo(30, 4)); CompilationOpContext cCtx = new CompilationOpContext(); VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); @@ -2368,7 +2379,7 @@ public class TestVectorGroupByOperator { mapColumnNames.add("A"); VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); - GroupByDesc desc = buildGroupByDescType (ctx, aggregateName, "A", + GroupByDesc desc = buildGroupByDescType (ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "A", TypeInfoFactory.doubleTypeInfo); CompilationOpContext cCtx = new CompilationOpContext(); @@ -2400,7 +2411,7 @@ public class TestVectorGroupByOperator { mapColumnNames.add("A"); VectorizationContext ctx = new VectorizationContext("name", mapColumnNames); - GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, "A", TypeInfoFactory.longTypeInfo); + GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "A", TypeInfoFactory.longTypeInfo); CompilationOpContext cCtx = new CompilationOpContext(); VectorGroupByOperator vgo = new VectorGroupByOperator(cCtx, ctx, desc); http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java index 9d4ca76..3295372 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUD import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncAbsLongToLong; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.*; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; import org.apache.hadoop.hive.ql.udf.generic.*; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -107,6 +108,7 @@ public class TestVectorizer { GroupByOperator gbyOp = new GroupByOperator(new CompilationOpContext()); gbyOp.setConf(desc); + desc.setMode(GroupByDesc.Mode.HASH); Vectorizer v = new Vectorizer(); Assert.assertTrue(v.validateMapWorkOperator(gbyOp, null, false)); @@ -148,9 +150,9 @@ public class TestVectorizer { Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, VectorExpressionDescriptor.Mode.FILTER)); Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, VectorExpressionDescriptor.Mode.PROJECTION)); } - + /** - * prepareAbstractMapJoin prepares a join operator descriptor, used as helper by SMB and Map join tests. + * prepareAbstractMapJoin prepares a join operator descriptor, used as helper by SMB and Map join tests. */ private void prepareAbstractMapJoin(AbstractMapJoinOperator<? extends MapJoinDesc> map, MapJoinDesc mjdesc) { mjdesc.setPosBigTable(0); @@ -189,15 +191,15 @@ public class TestVectorizer { public void testValidateMapJoinOperator() { MapJoinOperator map = new MapJoinOperator(new CompilationOpContext()); MapJoinDesc mjdesc = new MapJoinDesc(); - + prepareAbstractMapJoin(map, mjdesc); map.setConf(mjdesc); - + Vectorizer vectorizer = new Vectorizer(); Assert.assertTrue(vectorizer.validateMapWorkOperator(map, null, false)); } - + /** * testValidateSMBJoinOperator validates that the SMB join operator can be vectorized. */ @@ -205,11 +207,11 @@ public class TestVectorizer { public void testValidateSMBJoinOperator() { SMBMapJoinOperator map = new SMBMapJoinOperator(new CompilationOpContext()); SMBJoinDesc mjdesc = new SMBJoinDesc(); - + prepareAbstractMapJoin(map, mjdesc); map.setConf(mjdesc); - + Vectorizer vectorizer = new Vectorizer(); - Assert.assertTrue(vectorizer.validateMapWorkOperator(map, null, false)); + Assert.assertTrue(vectorizer.validateMapWorkOperator(map, null, false)); } } http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/queries/clientpositive/vector_count.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_count.q b/ql/src/test/queries/clientpositive/vector_count.q new file mode 100644 index 0000000..341db74 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_count.q @@ -0,0 +1,26 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +-- SORT_QUERY_RESULTS + +create table abcd_txt (a int, b int, c int, d int); +LOAD DATA LOCAL INPATH '../../data/files/in4.txt' INTO TABLE abcd_txt; + +create table abcd stored as orc as select * from abcd_txt; + +select * from abcd; +set hive.map.aggr=true; +explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a; +select a, count(distinct b), count(distinct c), sum(d) from abcd group by a; + +explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd; +select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd; + +set hive.map.aggr=false; +explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a; +select a, count(distinct b), count(distinct c), sum(d) from abcd group by a; + +explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd; +select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd; http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/queries/clientpositive/vector_groupby4.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_groupby4.q b/ql/src/test/queries/clientpositive/vector_groupby4.q new file mode 100644 index 0000000..a59d1a8 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_groupby4.q @@ -0,0 +1,23 @@ +set hive.explain.user=false; +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +set hive.mapred.mode=nonstrict; +set hive.map.aggr=false; +set hive.groupby.skewindata=true; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +CREATE TABLE srcorc STORED AS ORC AS SELECT * FROM src; + +-- SORT_QUERY_RESULTS + +CREATE TABLE dest1(c1 STRING) STORED AS ORC; + +EXPLAIN +FROM srcorc +INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1); + +FROM srcorc +INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1); + +SELECT dest1.* FROM dest1; + http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/queries/clientpositive/vector_groupby6.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_groupby6.q b/ql/src/test/queries/clientpositive/vector_groupby6.q new file mode 100644 index 0000000..89c7a19 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_groupby6.q @@ -0,0 +1,24 @@ +set hive.explain.user=false; +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +set hive.mapred.mode=nonstrict; +set hive.map.aggr=false; +set hive.groupby.skewindata=true; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +CREATE TABLE srcorc STORED AS ORC AS SELECT * FROM src; + +-- SORT_QUERY_RESULTS + +CREATE TABLE dest1(c1 STRING) STORED AS ORC; + +EXPLAIN +FROM srcorc +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1); + +FROM srcorc +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1); + +SELECT dest1.* FROM dest1; + + http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/results/clientpositive/tez/vector_aggregate_without_gby.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_aggregate_without_gby.q.out b/ql/src/test/results/clientpositive/tez/vector_aggregate_without_gby.q.out index ab627b5..3bbbb46 100644 --- a/ql/src/test/results/clientpositive/tez/vector_aggregate_without_gby.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_aggregate_without_gby.q.out @@ -47,11 +47,11 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized - File Output Operator [FS_7] - Group By Operator [GBY_12] (rows=1 width=88) + File Output Operator [FS_14] + Group By Operator [GBY_13] (rows=1 width=88) Output:["_col0","_col1"],aggregations:["max(VALUE._col0)","max(VALUE._col1)"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_4] + SHUFFLE [RS_12] Group By Operator [GBY_11] (rows=1 width=88) Output:["_col0","_col1"],aggregations:["max(dt)","max(greg_dt)"] Select Operator [SEL_10] (rows=3 width=102) http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/results/clientpositive/tez/vector_auto_smb_mapjoin_14.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/tez/vector_auto_smb_mapjoin_14.q.out index 67ddd9e..d37dc51 100644 --- a/ql/src/test/results/clientpositive/tez/vector_auto_smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_auto_smb_mapjoin_14.q.out @@ -64,7 +64,7 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized - File Output Operator [FS_14] + File Output Operator [FS_22] Group By Operator [GBY_21] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Map 1 [SIMPLE_EDGE] @@ -134,11 +134,11 @@ Stage-0 limit:-1 Stage-1 Reducer 3 vectorized - File Output Operator [FS_19] - Group By Operator [GBY_29] (rows=1 width=8) + File Output Operator [FS_31] + Group By Operator [GBY_30] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Reducer 2 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_16] + SHUFFLE [RS_29] Group By Operator [GBY_28] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] Select Operator [SEL_27] (rows=5 width=93) @@ -365,7 +365,7 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized - File Output Operator [FS_14] + File Output Operator [FS_22] Group By Operator [GBY_21] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Map 1 [SIMPLE_EDGE] @@ -444,7 +444,7 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized - File Output Operator [FS_14] + File Output Operator [FS_22] Group By Operator [GBY_21] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Map 1 [SIMPLE_EDGE] @@ -547,7 +547,7 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized - File Output Operator [FS_14] + File Output Operator [FS_22] Group By Operator [GBY_21] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Map 1 [SIMPLE_EDGE] @@ -640,7 +640,7 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized - File Output Operator [FS_14] + File Output Operator [FS_22] Group By Operator [GBY_21] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Map 1 [SIMPLE_EDGE] @@ -710,7 +710,7 @@ Stage-0 limit:-1 Stage-1 Reducer 3 vectorized - File Output Operator [FS_14] + File Output Operator [FS_29] Group By Operator [GBY_28] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Reducer 2 [SIMPLE_EDGE] @@ -781,7 +781,7 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized - File Output Operator [FS_14] + File Output Operator [FS_22] Group By Operator [GBY_21] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Map 1 [SIMPLE_EDGE] @@ -852,7 +852,7 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized - File Output Operator [FS_18] + File Output Operator [FS_32] Group By Operator [GBY_31] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Map 1 [SIMPLE_EDGE] @@ -945,7 +945,7 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized - File Output Operator [FS_14] + File Output Operator [FS_22] Group By Operator [GBY_21] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Map 1 [SIMPLE_EDGE] http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/results/clientpositive/tez/vector_count.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_count.q.out b/ql/src/test/results/clientpositive/tez/vector_count.q.out new file mode 100644 index 0000000..c854515 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/vector_count.q.out @@ -0,0 +1,314 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table abcd_txt (a int, b int, c int, d int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@abcd_txt +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table abcd_txt (a int, b int, c int, d int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@abcd_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in4.txt' INTO TABLE abcd_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@abcd_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in4.txt' INTO TABLE abcd_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@abcd_txt +PREHOOK: query: create table abcd stored as orc as select * from abcd_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@abcd_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@abcd +POSTHOOK: query: create table abcd stored as orc as select * from abcd_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@abcd_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@abcd +POSTHOOK: Lineage: abcd.a SIMPLE [(abcd_txt)abcd_txt.FieldSchema(name:a, type:int, comment:null), ] +POSTHOOK: Lineage: abcd.b SIMPLE [(abcd_txt)abcd_txt.FieldSchema(name:b, type:int, comment:null), ] +POSTHOOK: Lineage: abcd.c SIMPLE [(abcd_txt)abcd_txt.FieldSchema(name:c, type:int, comment:null), ] +POSTHOOK: Lineage: abcd.d SIMPLE [(abcd_txt)abcd_txt.FieldSchema(name:d, type:int, comment:null), ] +PREHOOK: query: select * from abcd +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select * from abcd +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +10 100 45 4 +10 100 NULL 5 +10 1000 50 1 +100 100 10 3 +12 100 75 7 +12 NULL 80 2 +NULL 35 23 6 +PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +PREHOOK: type: QUERY +POSTHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int), c (type: int), d (type: int) + outputColumnNames: a, b, c, d + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT b), count(DISTINCT c), sum(d) + keys: a (type: int), b (type: int), c (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: bigint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0), sum(VALUE._col2) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +10 2 2 10 +100 1 1 3 +12 1 2 9 +NULL 1 1 6 +PREHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int), c (type: int), d (type: int) + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1), count(), count(_col1), count(_col2), count(_col3), count(_col4), count(DISTINCT _col1), count(DISTINCT _col2), count(DISTINCT _col3), count(DISTINCT _col4), count(DISTINCT _col1, _col2), count(DISTINCT _col2, _col3), count(DISTINCT _col3, _col4), count(DISTINCT _col1, _col4), count(DISTINCT _col1, _col3), count(DISTINCT _col2, _col4), count(DISTINCT _col1, _col2, _col3), count(DISTINCT _col2, _col3, _col4), count(DISTINCT _col1, _col3, _col4), count(DISTINCT _col1, _col2, _col4), count(DISTINCT _col1, _col2, _col3, _col4) + keys: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + sort order: ++++ + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY. _col0:14._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +7 7 6 6 6 7 3 3 6 7 4 5 6 6 5 6 4 5 5 5 4 +PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +PREHOOK: type: QUERY +POSTHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int), c (type: int), d (type: int) + outputColumnNames: a, b, c, d + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: a (type: int), b (type: int), c (type: int) + sort order: +++ + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: d (type: int) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0), sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +10 2 2 10 +100 1 1 3 +12 1 2 9 +NULL 1 1 6 +PREHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int), c (type: int), d (type: int) + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + sort order: ++++ + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(1), count(), count(KEY._col0:0._col0), count(KEY._col0:1._col0), count(KEY._col0:2._col0), count(KEY._col0:3._col0), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, K EY._col0:14._col3) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +7 7 6 6 6 7 3 3 6 7 4 5 6 6 5 6 4 5 5 5 4 http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/results/clientpositive/tez/vector_groupby4.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_groupby4.q.out b/ql/src/test/results/clientpositive/tez/vector_groupby4.q.out new file mode 100644 index 0000000..401ab09 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/vector_groupby4.q.out @@ -0,0 +1,137 @@ +PREHOOK: query: CREATE TABLE srcorc STORED AS ORC AS SELECT * FROM src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@srcorc +POSTHOOK: query: CREATE TABLE srcorc STORED AS ORC AS SELECT * FROM src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcorc +POSTHOOK: Lineage: srcorc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcorc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(c1 STRING) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(c1 STRING) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM srcorc +INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM srcorc +INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcorc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: substr(key, 1, 1) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reducer 2 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dest1 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dest1 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: FROM srcorc +INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcorc +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM srcorc +INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcorc +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcorc)srcorc.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/results/clientpositive/tez/vector_groupby6.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_groupby6.q.out b/ql/src/test/results/clientpositive/tez/vector_groupby6.q.out new file mode 100644 index 0000000..c9174e6 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/vector_groupby6.q.out @@ -0,0 +1,137 @@ +PREHOOK: query: CREATE TABLE srcorc STORED AS ORC AS SELECT * FROM src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@srcorc +POSTHOOK: query: CREATE TABLE srcorc STORED AS ORC AS SELECT * FROM src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcorc +POSTHOOK: Lineage: srcorc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcorc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(c1 STRING) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(c1 STRING) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM srcorc +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM srcorc +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcorc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: substr(value, 5, 1) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reducer 2 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dest1 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dest1 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: FROM srcorc +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcorc +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM srcorc +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcorc +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcorc)srcorc.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out b/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out index d69012e..2d5e782 100644 --- a/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out @@ -27,8 +27,8 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized - File Output Operator [FS_33] - Select Operator [SEL_32] (rows=302 width=10) + File Output Operator [FS_36] + Select Operator [SEL_35] (rows=302 width=10) Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_21] @@ -48,9 +48,9 @@ Stage-0 <-Map Join Operator [MAPJOIN_27] (rows=550 width=10) Conds:(Inner),Output:["_col0","_col1"] <-Reducer 4 [BROADCAST_EDGE] vectorized - BROADCAST [RS_14] - Select Operator [SEL_10] (rows=1 width=8) - Filter Operator [FIL_9] (rows=1 width=8) + BROADCAST [RS_34] + Select Operator [SEL_33] (rows=1 width=8) + Filter Operator [FIL_32] (rows=1 width=8) predicate:(_col0 = 0) Group By Operator [GBY_31] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out b/ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out index 93137f1..9492f8a 100644 --- a/ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out @@ -113,7 +113,7 @@ Stage-0 Reducer 2 File Output Operator [FS_10] Merge Join Operator [MERGEJOIN_21] (rows=2 width=112) - Conds:RS_23._col2=RS_27._col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Conds:RS_23._col2=RS_28._col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_23] PartitionCols:_col2 @@ -121,16 +121,16 @@ Stage-0 Output:["_col0","_col1","_col2"] TableScan [TS_0] (rows=2 width=102) default@char_tbl1,c1,Tbl:COMPLETE,Col:NONE,Output:["name","age"] - Dynamic Partitioning Event Operator [EVENT_20] (rows=2 width=102) + Dynamic Partitioning Event Operator [EVENT_26] (rows=2 width=102) Group By Operator [GBY_25] (rows=2 width=102) Output:["_col0"],keys:_col0 Select Operator [SEL_24] (rows=2 width=102) Output:["_col0"] Please refer to the previous Select Operator [SEL_22] <-Map 3 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_27] + SHUFFLE [RS_28] PartitionCols:_col2 - Select Operator [SEL_26] (rows=2 width=101) + Select Operator [SEL_27] (rows=2 width=101) Output:["_col0","_col1","_col2"] TableScan [TS_3] (rows=2 width=101) default@char_tbl2,c2,Tbl:COMPLETE,Col:NONE,Output:["name","age"] http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out b/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out index d5c01d0..b544784 100644 --- a/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out @@ -360,6 +360,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 2 + Execution mode: vectorized Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: tinyint), KEY._col1 (type: double) http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/results/clientpositive/tez/vectorized_timestamp.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vectorized_timestamp.q.out b/ql/src/test/results/clientpositive/tez/vectorized_timestamp.q.out index fb272dd..4a19861 100644 --- a/ql/src/test/results/clientpositive/tez/vectorized_timestamp.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorized_timestamp.q.out @@ -132,13 +132,13 @@ Stage-0 limit:-1 Stage-1 Reducer 2 vectorized - File Output Operator [FS_6] - Select Operator [SEL_5] (rows=1 width=80) + File Output Operator [FS_12] + Select Operator [SEL_11] (rows=1 width=80) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_9] (rows=1 width=80) + Group By Operator [GBY_10] (rows=1 width=80) Output:["_col0","_col1"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_3] + SHUFFLE [RS_9] Group By Operator [GBY_8] (rows=1 width=80) Output:["_col0","_col1"],aggregations:["min(ts)","max(ts)"] Select Operator [SEL_7] (rows=2 width=40) http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/results/clientpositive/vector_count.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_count.q.out b/ql/src/test/results/clientpositive/vector_count.q.out new file mode 100644 index 0000000..734ef39 --- /dev/null +++ b/ql/src/test/results/clientpositive/vector_count.q.out @@ -0,0 +1,286 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table abcd_txt (a int, b int, c int, d int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@abcd_txt +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table abcd_txt (a int, b int, c int, d int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@abcd_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in4.txt' INTO TABLE abcd_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@abcd_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in4.txt' INTO TABLE abcd_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@abcd_txt +PREHOOK: query: create table abcd stored as orc as select * from abcd_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@abcd_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@abcd +POSTHOOK: query: create table abcd stored as orc as select * from abcd_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@abcd_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@abcd +POSTHOOK: Lineage: abcd.a SIMPLE [(abcd_txt)abcd_txt.FieldSchema(name:a, type:int, comment:null), ] +POSTHOOK: Lineage: abcd.b SIMPLE [(abcd_txt)abcd_txt.FieldSchema(name:b, type:int, comment:null), ] +POSTHOOK: Lineage: abcd.c SIMPLE [(abcd_txt)abcd_txt.FieldSchema(name:c, type:int, comment:null), ] +POSTHOOK: Lineage: abcd.d SIMPLE [(abcd_txt)abcd_txt.FieldSchema(name:d, type:int, comment:null), ] +PREHOOK: query: select * from abcd +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select * from abcd +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +10 100 45 4 +10 100 NULL 5 +10 1000 50 1 +100 100 10 3 +12 100 75 7 +12 NULL 80 2 +NULL 35 23 6 +PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +PREHOOK: type: QUERY +POSTHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int), c (type: int), d (type: int) + outputColumnNames: a, b, c, d + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT b), count(DISTINCT c), sum(d) + keys: a (type: int), b (type: int), c (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0), sum(VALUE._col2) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +10 2 2 10 +100 1 1 3 +12 1 2 9 +NULL 1 1 6 +PREHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int), c (type: int), d (type: int) + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1), count(), count(_col1), count(_col2), count(_col3), count(_col4), count(DISTINCT _col1), count(DISTINCT _col2), count(DISTINCT _col3), count(DISTINCT _col4), count(DISTINCT _col1, _col2), count(DISTINCT _col2, _col3), count(DISTINCT _col3, _col4), count(DISTINCT _col1, _col4), count(DISTINCT _col1, _col3), count(DISTINCT _col2, _col4), count(DISTINCT _col1, _col2, _col3), count(DISTINCT _col2, _col3, _col4), count(DISTINCT _col1, _col3, _col4), count(DISTINCT _col1, _col2, _col4), count(DISTINCT _col1, _col2, _col3, _col4) + keys: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + sort order: ++++ + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0: 14._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +7 7 6 6 6 7 3 3 6 7 4 5 6 6 5 6 4 5 5 5 4 +PREHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +PREHOOK: type: QUERY +POSTHOOK: query: explain select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int), c (type: int), d (type: int) + outputColumnNames: a, b, c, d + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: a (type: int), b (type: int), c (type: int) + sort order: +++ + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: d (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0), sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select a, count(distinct b), count(distinct c), sum(d) from abcd group by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +10 2 2 10 +100 1 1 3 +12 1 2 9 +NULL 1 1 6 +PREHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: abcd + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: int), c (type: int), d (type: int) + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + sort order: ++++ + Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(1), count(), count(KEY._col0:0._col0), count(KEY._col0:1._col0), count(KEY._col0:2._col0), count(KEY._col0:3._col0), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._co l0:14._col3) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +PREHOOK: type: QUERY +PREHOOK: Input: default@abcd +#### A masked pattern was here #### +POSTHOOK: query: select count(1), count(*), count(a), count(b), count(c), count(d), count(distinct a), count(distinct b), count(distinct c), count(distinct d), count(distinct a,b), count(distinct b,c), count(distinct c,d), count(distinct a,d), count(distinct a,c), count(distinct b,d), count(distinct a,b,c), count(distinct b,c,d), count(distinct a,c,d), count(distinct a,b,d), count(distinct a,b,c,d) from abcd +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abcd +#### A masked pattern was here #### +7 7 6 6 6 7 3 3 6 7 4 5 6 6 5 6 4 5 5 5 4 http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/results/clientpositive/vector_groupby4.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_groupby4.q.out b/ql/src/test/results/clientpositive/vector_groupby4.q.out new file mode 100644 index 0000000..8041511 --- /dev/null +++ b/ql/src/test/results/clientpositive/vector_groupby4.q.out @@ -0,0 +1,134 @@ +PREHOOK: query: CREATE TABLE srcorc STORED AS ORC AS SELECT * FROM src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@srcorc +POSTHOOK: query: CREATE TABLE srcorc STORED AS ORC AS SELECT * FROM src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcorc +POSTHOOK: Lineage: srcorc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcorc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(c1 STRING) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(c1 STRING) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM srcorc +INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM srcorc +INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: srcorc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: substr(key, 1, 1) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dest1 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: FROM srcorc +INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcorc +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM srcorc +INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcorc +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcorc)srcorc.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 http://git-wip-us.apache.org/repos/asf/hive/blob/8136a10c/ql/src/test/results/clientpositive/vector_groupby6.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_groupby6.q.out b/ql/src/test/results/clientpositive/vector_groupby6.q.out new file mode 100644 index 0000000..63fe8f3 --- /dev/null +++ b/ql/src/test/results/clientpositive/vector_groupby6.q.out @@ -0,0 +1,134 @@ +PREHOOK: query: CREATE TABLE srcorc STORED AS ORC AS SELECT * FROM src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@srcorc +POSTHOOK: query: CREATE TABLE srcorc STORED AS ORC AS SELECT * FROM src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcorc +POSTHOOK: Lineage: srcorc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcorc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(c1 STRING) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(c1 STRING) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM srcorc +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM srcorc +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: srcorc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: substr(value, 5, 1) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dest1 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: FROM srcorc +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcorc +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM srcorc +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcorc +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcorc)srcorc.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 +1 +2 +3 +4 +5 +6 +7 +8 +9
