http://git-wip-us.apache.org/repos/asf/hive/blob/92fbe256/ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out b/ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out index 28edb6f..f137c63 100644 --- a/ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out @@ -58,9 +58,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 5) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: _col0 (type: string) mode: hash @@ -101,9 +103,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: string) mode: mergepartial
http://git-wip-us.apache.org/repos/asf/hive/blob/92fbe256/ql/src/test/results/clientpositive/llap/vectorization_0.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out index b44e749..fba9c07 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -6,7 +6,7 @@ SELECT MIN(ctinyint) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -52,8 +52,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -62,8 +64,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap @@ -76,23 +80,36 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: VALUE._col0:tinyint, VALUE._col1:tinyint, VALUE._col2:bigint, VALUE._col3:bigint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) Group By Vectorization: aggregators: VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFMaxLong(col 1) -> tinyint, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 @@ -102,8 +119,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator + keyColumns: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [1, 2, 3] Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -111,10 +130,16 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:tinyint, VALUE._col0:tinyint, VALUE._col1:bigint, VALUE._col2:bigint + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: tinyint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) @@ -160,12 +185,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -64 62 9173 12288 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1 @@ -208,8 +233,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -218,8 +245,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -232,23 +261,36 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -258,18 +300,26 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator + keyColumns: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) @@ -375,18 +425,20 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct<count:bigint,sum:double,input:tinyint> of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -455,7 +507,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -4.344925324321378 1158.3003004768184 1158.3003004768184 1158.4265870337827 34.033811136527426 34.033811136527426 34.033811136527426 34.03566639620536 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -463,7 +515,7 @@ SELECT MIN(cbigint) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -509,8 +561,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinLong(col 3) -> bigint, VectorUDAFMaxLong(col 3) -> bigint, VectorUDAFCount(col 3) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -519,8 +573,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap @@ -533,23 +589,36 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [3] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint, VALUE._col2:bigint, VALUE._col3:bigint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) Group By Vectorization: aggregators: VectorUDAFMinLong(col 0) -> bigint, VectorUDAFMaxLong(col 1) -> bigint, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 @@ -559,8 +628,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator + keyColumns: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [1, 2, 3] Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -568,10 +639,16 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:bigint, VALUE._col2:bigint + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) @@ -617,12 +694,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -2147311592 2145498388 9173 12288 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1 @@ -665,8 +742,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 3) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -675,8 +754,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -689,23 +770,36 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [3] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -715,18 +809,26 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator + keyColumns: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) @@ -832,18 +934,20 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct<count:bigint,sum:double,input:bigint> of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -912,7 +1016,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1.8515862077935246E8 2.07689300543081907E18 2.07689300543081907E18 2.07711944383088768E18 1.441142951074188E9 1.441142951074188E9 1.441142951074188E9 1.4412215110214279E9 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -920,7 +1024,7 @@ SELECT MIN(cfloat) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -966,8 +1070,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinDouble(col 4) -> float, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFCount(col 4) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -976,8 +1082,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap @@ -990,23 +1098,36 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [4] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: VALUE._col0:float, VALUE._col1:float, VALUE._col2:bigint, VALUE._col3:bigint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) Group By Vectorization: aggregators: VectorUDAFMinDouble(col 0) -> float, VectorUDAFMaxDouble(col 1) -> float, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 @@ -1016,8 +1137,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator + keyColumns: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [1, 2, 3] Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -1025,10 +1148,16 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:float, VALUE._col0:float, VALUE._col1:bigint, VALUE._col2:bigint + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: float), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) @@ -1074,12 +1203,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -64.0 79.553 9173 12288 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1 @@ -1122,8 +1251,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumDouble(col 4) -> double className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -1132,8 +1263,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double) Execution mode: vectorized, llap @@ -1146,23 +1279,36 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [4] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:double + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) Group By Vectorization: aggregators: VectorUDAFSumDouble(col 0) -> double className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -1172,18 +1318,26 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator + keyColumns: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:double + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double) @@ -1289,18 +1443,20 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct<count:bigint,sum:double,input:float> of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -1370,7 +1526,7 @@ POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -4.303895780321011 1163.8972588604984 1163.8972588604984 1164.0241556397025 34.115938487171924 34.115938487171924 34.115938487171924 34.11779822379666 WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), @@ -1397,7 +1553,7 @@ WHERE (((cstring2 LIKE '%b%') AND ((cboolean2 = 1) AND (3569 = ctinyint)))) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), @@ -1466,17 +1622,24 @@ STAGE PLANS: Group By Operator aggregations: avg(cbigint), stddev_pop(cbigint), var_samp(cbigint), count(), sum(cfloat), min(ctinyint) Group By Vectorization: - aggregators: VectorUDAFAvgLong(col 3) -> struct<count:bigint,sum:double>, VectorUDAFStdPopLong(col 3) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFVarSampLong(col 3) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFMinLong(col 0) -> tinyint + aggregators: VectorUDAFAvgLong(col 3) -> struct<count:bigint,sum:double,input:bigint>, VectorUDAFStdPopLong(col 3) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFVarSampLong(col 3) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFMinLong(col 0) -> tinyint className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 3) -> struct<count:bigint,sum:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 3) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 3) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct<count:bigint,sum:double,input:bigint>), _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: struct<count:bigint,sum:double,variance:double>), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint) Execution mode: vectorized, llap @@ -1484,30 +1647,60 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 7, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: decimal(13,3), double Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct<count:bigint,sum:double,input:bigint> of Column[VALUE._col0] not supported - vectorized: false + reduceColumnNullOrder: + reduceColumnSortOrder: + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + dataColumns: VALUE._col0:struct<count:bigint,sum:double,input:bigint>, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:struct<count:bigint,sum:double,variance:double>, VALUE._col3:bigint, VALUE._col4:double, VALUE._col5:tinyint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1), var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5) + Group By Vectorization: + aggregators: VectorUDAFAvgFinal(col 0) -> double, VectorUDAFStdPopFinal(col 1) -> double, VectorUDAFVarSampFinal(col 2) -> double, VectorUDAFCountMerge(col 3) -> bigint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFMinLong(col 5) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: double), (- _col0) (type: double), (-6432.0 + _col0) (type: double), _col1 (type: double), (- (-6432.0 + _col0)) (type: double), ((- (-6432.0 + _col0)) + (-6432.0 + _col0)) (type: double), _col2 (type: double), (- (-6432.0 + _col0)) (type: double), (-6432.0 + (- (-6432.0 + _col0))) (type: double), (- (-6432.0 + _col0)) (type: double), ((- (-6432.0 + _col0)) / (- (-6432.0 + _col0))) (type: double), _col3 (type: bigint), _col4 (type: double), (_col2 % _col1) (type: double), (- _col2) (type: double), ((- (-6432.0 + _col0)) * (- _col0)) (type: double), _col5 (type: tinyint), (- _col5) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 6, 7, 1, 9, 11, 2, 10, 8, 13, 12, 3, 4, 14, 15, 18, 5, 19] + selectExpressions: DoubleColUnaryMinus(col 0) -> 6:double, DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 7:double, DoubleColUnaryMinus(col 8)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 8:double) -> 9:double, DoubleColAddDoubleColumn(col 10, col 8)(children: DoubleColUnaryMinus(col 8)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 8:double) -> 10:double, DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 8:double) -> 11:double, DoubleColUnaryMinus(col 8)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 8:double) -> 10:double, DoubleScalarAddDoubleColumn(val -6432.0, col 12)(children: DoubleColUnaryMinus(col 8)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 8:double) -> 12:double) -> 8:double, DoubleColUnaryMinus(col 12)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 12:double) -> 13:double, DoubleColDivideDoubleColumn(col 14, col 15)(children: DoubleColUnaryMinus(col 12)(children: Doubl eScalarAddDoubleColumn(val -6432.0, col 0) -> 12:double) -> 14:double, DoubleColUnaryMinus(col 12)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 12:double) -> 15:double) -> 12:double, DoubleColModuloDoubleColumn(col 2, col 1) -> 14:double, DoubleColUnaryMinus(col 2) -> 15:double, DoubleColMultiplyDoubleColumn(col 17, col 16)(children: DoubleColUnaryMinus(col 16)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 16:double) -> 17:double, DoubleColUnaryMinus(col 0) -> 16:double) -> 18:double, LongColUnaryMinus(col 5) -> 19:long Statistics: Num rows: 1 Data size: 136 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 136 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/92fbe256/ql/src/test/results/clientpositive/llap/vectorization_1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorization_1.q.out b/ql/src/test/results/clientpositive/llap/vectorization_1.q.out index e0a4344..4699c2e 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_1.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_1.q.out @@ -1,3 +1,178 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT VAR_POP(ctinyint), + (VAR_POP(ctinyint) / -26.28), + SUM(cfloat), + (-1.389 + SUM(cfloat)), + (SUM(cfloat) * (-1.389 + SUM(cfloat))), + MAX(ctinyint), + (-((SUM(cfloat) * (-1.389 + SUM(cfloat))))), + MAX(cint), + (MAX(cint) * 79.553), + VAR_SAMP(cdouble), + (10.175 % (-((SUM(cfloat) * (-1.389 + SUM(cfloat)))))), + COUNT(cint), + (-563 % MAX(cint)) +FROM alltypesorc +WHERE (((cdouble > ctinyint) + AND (cboolean2 > 0)) + OR ((cbigint < ctinyint) + OR ((cint > cbigint) + OR (cboolean1 < 0)))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT VAR_POP(ctinyint), + (VAR_POP(ctinyint) / -26.28), + SUM(cfloat), + (-1.389 + SUM(cfloat)), + (SUM(cfloat) * (-1.389 + SUM(cfloat))), + MAX(ctinyint), + (-((SUM(cfloat) * (-1.389 + SUM(cfloat))))), + MAX(cint), + (MAX(cint) * 79.553), + VAR_SAMP(cdouble), + (10.175 % (-((SUM(cfloat) * (-1.389 + SUM(cfloat)))))), + COUNT(cint), + (-563 % MAX(cint)) +FROM alltypesorc +WHERE (((cdouble > ctinyint) + AND (cboolean2 > 0)) + OR ((cbigint < ctinyint) + OR ((cint > cbigint) + OR (cboolean1 < 0)))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 330276 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, FilterLongColGreaterLongScalar(col 11, val 0) -> boolean) -> boolean, FilterLongColLessLongColumn(col 3, col 0)(children: col 0) -> boolean, FilterLongColGreaterLongColumn(col 2, col 3)(children: col 2) -> boolean, FilterLongColLessLongScalar(col 10, val 0) -> boolean) -> boolean + predicate: (((cdouble > UDFToDouble(ctinyint)) and (cboolean2 > 0)) or (cbigint < UDFToLong(ctinyint)) or (UDFToLong(cint) > cbigint) or (cboolean1 < 0)) (type: boolean) + Statistics: Num rows: 12288 Data size: 330276 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ctinyint (type: tinyint), cint (type: int), cfloat (type: float), cdouble (type: double) + outputColumnNames: ctinyint, cint, cfloat, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 4, 5] + Statistics: Num rows: 12288 Data size: 330276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: var_pop(ctinyint), sum(cfloat), max(ctinyint), max(cint), var_samp(cdouble), count(cint) + Group By Vectorization: + aggregators: VectorUDAFVarPopLong(col 0) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFSumDouble(col 4) -> double, VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFMaxLong(col 2) -> int, VectorUDAFVarSampDouble(col 5) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFCount(col 2) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3, 4, 5] + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct<count:bigint,sum:double,variance:double>), _col1 (type: double), _col2 (type: tinyint), _col3 (type: int), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 2, 3, 4, 5, 10, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + dataColumns: VALUE._col0:struct<count:bigint,sum:double,variance:double>, VALUE._col1:double, VALUE._col2:tinyint, VALUE._col3:int, VALUE._col4:struct<count:bigint,sum:double,variance:double>, VALUE._col5:bigint + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: var_pop(VALUE._col0), sum(VALUE._col1), max(VALUE._col2), max(VALUE._col3), var_samp(VALUE._col4), count(VALUE._col5) + Group By Vectorization: + aggregators: VectorUDAFVarPopFinal(col 0) -> double, VectorUDAFSumDouble(col 1) -> double, VectorUDAFMaxLong(col 2) -> tinyint, VectorUDAFMaxLong(col 3) -> int, VectorUDAFVarSampFinal(col 4) -> double, VectorUDAFCountMerge(col 5) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: double), (_col0 / -26.28) (type: double), _col1 (type: double), (-1.389 + _col1) (type: double), (_col1 * (-1.389 + _col1)) (type: double), _col2 (type: tinyint), (- (_col1 * (-1.389 + _col1))) (type: double), _col3 (type: int), (CAST( _col3 AS decimal(10,0)) * 79.553) (type: decimal(16,3)), _col4 (type: double), (10.175 % (- (_col1 * (-1.389 + _col1)))) (type: double), _col5 (type: bigint), (-563 % _col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 6, 1, 7, 9, 2, 8, 3, 12, 4, 13, 5, 14] + selectExpressions: DoubleColDivideDoubleScalar(col 0, val -26.28) -> 6:double, DoubleScalarAddDoubleColumn(val -1.389, col 1) -> 7:double, DoubleColMultiplyDoubleColumn(col 1, col 8)(children: DoubleScalarAddDoubleColumn(val -1.389, col 1) -> 8:double) -> 9:double, DoubleColUnaryMinus(col 10)(children: DoubleColMultiplyDoubleColumn(col 1, col 8)(children: DoubleScalarAddDoubleColumn(val -1.389, col 1) -> 8:double) -> 10:double) -> 8:double, DecimalColMultiplyDecimalScalar(col 11, val 79.553)(children: CastLongToDecimal(col 3) -> 11:decimal(10,0)) -> 12:decimal(16,3), DoubleScalarModuloDoubleColumn(val 10.175, col 10)(children: DoubleColUnaryMinus(col 13)(children: DoubleColMultiplyDoubleColumn(col 1, col 10)(children: DoubleScalarAddDoubleColumn(val -1.389, col 1) -> 10:double) -> 13:double) -> 10:double) -> 13:double, LongScalarModuloLongColumn(val -563, col 3) -> 14:long + Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT VAR_POP(ctinyint), (VAR_POP(ctinyint) / -26.28), SUM(cfloat), http://git-wip-us.apache.org/repos/asf/hive/blob/92fbe256/ql/src/test/results/clientpositive/llap/vectorization_10.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorization_10.q.out b/ql/src/test/results/clientpositive/llap/vectorization_10.q.out index 9dad4c4..f06c2db 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_10.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_10.q.out @@ -1,3 +1,121 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cdouble, + ctimestamp1, + ctinyint, + cboolean1, + cstring1, + (-(cdouble)), + (cdouble + csmallint), + ((cdouble + csmallint) % 33), + (-(cdouble)), + (ctinyint % cdouble), + (ctinyint % csmallint), + (-(cdouble)), + (cbigint * (ctinyint % csmallint)), + (9763215.5639 - (cdouble + csmallint)), + (-((-(cdouble)))) +FROM alltypesorc +WHERE (((cstring2 <= '10') + OR ((ctinyint > cdouble) + AND (-5638.15 >= ctinyint))) + OR ((cdouble > 6981) + AND ((csmallint = 9763215.5639) + OR (cstring1 LIKE '%a')))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cdouble, + ctimestamp1, + ctinyint, + cboolean1, + cstring1, + (-(cdouble)), + (cdouble + csmallint), + ((cdouble + csmallint) % 33), + (-(cdouble)), + (ctinyint % cdouble), + (ctinyint % csmallint), + (-(cdouble)), + (cbigint * (ctinyint % csmallint)), + (9763215.5639 - (cdouble + csmallint)), + (-((-(cdouble)))) +FROM alltypesorc +WHERE (((cstring2 <= '10') + OR ((ctinyint > cdouble) + AND (-5638.15 >= ctinyint))) + OR ((cdouble > 6981) + AND ((csmallint = 9763215.5639) + OR (cstring1 LIKE '%a')))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2491562 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterStringGroupColLessEqualStringScalar(col 7, val 10) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, FilterDecimalScalarGreaterEqualDecimalColumn(val -5638.15, col 13)(children: CastLongToDecimal(col 0) -> 13:decimal(6,2)) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5, val 6981.0) -> boolean, FilterExprOrExpr(children: FilterDecimalColEqualDecimalScalar(col 14, val 9763215.5639)(children: CastLongToDecimal(col 1) -> 14:decimal(11,4)) -> boolean, FilterStringColLikeStringScalar(col 6, pattern %a) -> boolean) -> boolean) -> boolean) -> boolean + predicate: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (-5638.15 >= CAST( ctinyint AS decimal(6,2)))) or ((cdouble > 6981.0) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) + Statistics: Num rows: 5461 Data size: 1107444 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cdouble (type: double), ctimestamp1 (type: timestamp), ctinyint (type: tinyint), cboolean1 (type: boolean), cstring1 (type: string), (- cdouble) (type: double), (cdouble + UDFToDouble(csmallint)) (type: double), ((cdouble + UDFToDouble(csmallint)) % 33.0) (type: double), (- cdouble) (type: double), (UDFToDouble(ctinyint) % cdouble) (type: double), (UDFToShort(ctinyint) % csmallint) (type: smallint), (- cdouble) (type: double), (cbigint * UDFToLong((UDFToShort(ctinyint) % csmallint))) (type: bigint), (9763215.5639 - (cdouble + UDFToDouble(csmallint))) (type: double), (- (- cdouble)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 8, 0, 10, 6, 12, 16, 15, 17, 19, 20, 18, 22, 23, 25] + selectExpressions: DoubleColUnaryMinus(col 5) -> 12:double, DoubleColAddDoubleColumn(col 5, col 15)(children: CastLongToDouble(col 1) -> 15:double) -> 16:double, DoubleColModuloDoubleScalar(col 17, val 33.0)(children: DoubleColAddDoubleColumn(col 5, col 15)(children: CastLongToDouble(col 1) -> 15:double) -> 17:double) -> 15:double, DoubleColUnaryMinus(col 5) -> 17:double, DoubleColModuloDoubleColumn(col 18, col 5)(children: CastLongToDouble(col 0) -> 18:double) -> 19:double, LongColModuloLongColumn(col 0, col 1)(children: col 0) -> 20:long, DoubleColUnaryMinus(col 5) -> 18:double, LongColMultiplyLongColumn(col 3, col 21)(children: col 21) -> 22:long, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 24)(children: DoubleColAddDoubleColumn(col 5, col 23)(children: CastLongToDouble(col 1) -> 23:double) -> 24:double) -> 23:double, DoubleColUnaryMinus(col 24)(children: DoubleColUnaryMinus(col 5) -> 24:double) -> 25:double + Statistics: Num rows: 5461 Data size: 1082056 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5461 Data size: 1082056 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 3, 5, 6, 7, 8, 10] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double, decimal(6,2), decimal(11,4), double, double, double, double, double, bigint, bigint, bigint, double, double, double + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT cdouble, ctimestamp1, ctinyint, http://git-wip-us.apache.org/repos/asf/hive/blob/92fbe256/ql/src/test/results/clientpositive/llap/vectorization_11.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorization_11.q.out b/ql/src/test/results/clientpositive/llap/vectorization_11.q.out index dff58da..2b8c391 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_11.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_11.q.out @@ -1,3 +1,103 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cstring1, + cboolean1, + cdouble, + ctimestamp1, + (-3728 * csmallint), + (cdouble - 9763215.5639), + (-(cdouble)), + ((-(cdouble)) + 6981), + (cdouble * -5638.15) +FROM alltypesorc +WHERE ((cstring2 = cstring1) + OR ((ctimestamp1 IS NULL) + AND (cstring1 LIKE '%a'))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cstring1, + cboolean1, + cdouble, + ctimestamp1, + (-3728 * csmallint), + (cdouble - 9763215.5639), + (-(cdouble)), + ((-(cdouble)) + 6981), + (cdouble * -5638.15) +FROM alltypesorc +WHERE ((cstring2 = cstring1) + OR ((ctimestamp1 IS NULL) + AND (cstring1 LIKE '%a'))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2381474 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringGroupColumn(col 7, col 6) -> boolean, FilterExprAndExpr(children: SelectColumnIsNull(col 8) -> boolean, FilterStringColLikeStringScalar(col 6, pattern %a) -> boolean) -> boolean) -> boolean + predicate: ((cstring2 = cstring1) or (ctimestamp1 is null and (cstring1 like '%a'))) (type: boolean) + Statistics: Num rows: 6144 Data size: 1190792 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), ctimestamp1 (type: timestamp), (-3728 * UDFToInteger(csmallint)) (type: int), (cdouble - 9763215.5639) (type: double), (- cdouble) (type: double), ((- cdouble) + 6981.0) (type: double), (cdouble * -5638.15) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [6, 10, 5, 8, 12, 13, 14, 16, 15] + selectExpressions: LongScalarMultiplyLongColumn(val -3728, col 1)(children: col 1) -> 12:long, DoubleColSubtractDoubleScalar(col 5, val 9763215.5639) -> 13:double, DoubleColUnaryMinus(col 5) -> 14:double, DoubleColAddDoubleScalar(col 15, val 6981.0)(children: DoubleColUnaryMinus(col 5) -> 15:double) -> 16:double, DoubleColMultiplyDoubleScalar(col 5, val -5638.15) -> 15:double + Statistics: Num rows: 6144 Data size: 953272 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6144 Data size: 953272 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [1, 5, 6, 7, 8, 10] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, double, double, double, double + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT cstring1, cboolean1, cdouble,