http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out b/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out index 7de04a7..615c31a 100644 --- a/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out @@ -95,14 +95,18 @@ POSTHOOK: Lineage: hundredorc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type: POSTHOOK: Lineage: hundredorc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: hundredorc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: hundredorc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT sum(hash(*)) FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -121,12 +125,23 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 10) -> boolean predicate: bin is not null (type: boolean) Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -134,6 +149,10 @@ STAGE PLANS: keys: 0 _col10 (type: binary) 1 _col10 (type: binary) + Map Join Vectorization: + className: VectorMapJoinInnerStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 input vertices: 1 Map 3 @@ -141,48 +160,111 @@ STAGE PLANS: Select Operator expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21) (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [21] + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21)) -> 21:int Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 21) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Map 3 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 10) -> boolean predicate: bin is not null (type: boolean) Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col10 (type: binary) sort order: + Map-reduce partition columns: _col10 (type: binary) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -206,16 +288,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@hundredorc #### A masked pattern was here #### -27832781952 -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT count(*), bin FROM hundredorc GROUP BY bin PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT count(*), bin FROM hundredorc GROUP BY bin POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -233,12 +319,26 @@ STAGE PLANS: TableScan alias: hundredorc Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Select Operator expressions: bin (type: binary) outputColumnNames: bin + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10] Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 10 + native: false + projectedOutputColumns: [0] keys: bin (type: binary) mode: hash outputColumnNames: _col0, _col1 @@ -247,15 +347,41 @@ STAGE PLANS: key expressions: _col0 (type: binary) sort order: + Map-reduce partition columns: _col0 (type: binary) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0] keys: KEY._col0 (type: binary) mode: mergepartial outputColumnNames: _col0, _col1 @@ -263,9 +389,16 @@ STAGE PLANS: Select Operator expressions: _col1 (type: bigint), _col0 (type: binary) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -316,14 +449,18 @@ POSTHOOK: Input: default@hundredorc 3 xylophone band 2 yard duty 3 zync studies -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t1.i, t1.bin, t2.bin FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t1.i, t1.bin, t2.bin FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -341,12 +478,23 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: i is not null (type: boolean) Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i (type: int), bin (type: binary) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 10] Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -354,6 +502,10 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 2 @@ -361,9 +513,16 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: binary), _col3 (type: binary) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 10, 11] Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -371,26 +530,57 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: i is not null (type: boolean) Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i (type: int), bin (type: binary) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 10] Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vector_bround.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_bround.q.out b/ql/src/test/results/clientpositive/llap/vector_bround.q.out index 431b91a..d463f1a 100644 --- a/ql/src/test/results/clientpositive/llap/vector_bround.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_bround.q.out @@ -32,19 +32,22 @@ POSTHOOK: type: QUERY POSTHOOK: Output: default@test_vector_bround POSTHOOK: Lineage: test_vector_bround.v0 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: test_vector_bround.v1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain select bround(v0), bround(v1, 1) from test_vector_bround +PREHOOK: query: explain vectorization select bround(v0), bround(v1, 1) from test_vector_bround PREHOOK: type: QUERY -POSTHOOK: query: explain select bround(v0), bround(v1, 1) from test_vector_bround +POSTHOOK: query: explain vectorization select bround(v0), bround(v1, 1) from test_vector_bround POSTHOOK: type: QUERY Plan optimized by CBO. Stage-0 Fetch Operator limit:-1 - Select Operator [SEL_1] - Output:["_col0","_col1"] - TableScan [TS_0] - Output:["v0","v1"] + Stage-1 + Map 1 vectorized, llap + File Output Operator [FS_4] + Select Operator [SEL_3] (rows=8 width=16) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=8 width=16) + default@test_vector_bround,test_vector_bround,Tbl:COMPLETE,Col:NONE,Output:["v0","v1"] PREHOOK: query: select bround(v0), bround(v1, 1) from test_vector_bround PREHOOK: type: QUERY http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vector_bucket.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_bucket.q.out b/ql/src/test/results/clientpositive/llap/vector_bucket.q.out index 0573abf..7917ffd 100644 --- a/ql/src/test/results/clientpositive/llap/vector_bucket.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_bucket.q.out @@ -6,12 +6,16 @@ POSTHOOK: query: CREATE TABLE non_orc_table(a INT, b STRING) CLUSTERED BY(a) INT POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@non_orc_table -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three') PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression insert into table non_orc_table values(1, 'one'),(1, 'one'), (2, 'two'),(3, 'three') POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -42,15 +46,34 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reduce Operator Tree: Select Operator expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 1] + selectExpressions: VectorUDFAdaptor(UDFToInteger(VALUE._col0)) -> 2:int Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out b/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out index 9a2456f..31452c2 100644 --- a/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out @@ -95,20 +95,24 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ] -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` FROM over1korc GROUP BY i ORDER BY i LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT i, AVG(CAST(50 AS INT)) AS `avg_int_ok`, AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`, AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok` FROM over1korc GROUP BY i ORDER BY i LIMIT 10 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -127,12 +131,27 @@ STAGE PLANS: TableScan alias: over1korc Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] Select Operator expressions: i (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: avg(50), avg(50.0), avg(50) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct<count:bigint,sum:double>, VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct<count:bigint,sum:double>, VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct<count:bigint,sum:decimal(20,0)> + className: VectorGroupByOperator + vectorOutput: false + keyExpressions: col 2 + native: false + projectedOutputColumns: [0, 1, 2] + vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct<count:bigint,sum:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct<count:bigint,sum:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct<count:bigint,sum:decimal(20,0)> output type STRUCT requires PRIMITIVE IS false keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -146,8 +165,21 @@ STAGE PLANS: value expressions: _col1 (type: struct<count:bigint,sum:double,input:int>), _col2 (type: struct<count:bigint,sum:double,input:double>), _col3 (type: struct<count:bigint,sum:decimal(12,0),input:decimal(10,0)>) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: false + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct<count:bigint,sum:double,input:int> of Column[VALUE._col0] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) @@ -163,16 +195,33 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vector_char_2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_char_2.q.out b/ql/src/test/results/clientpositive/llap/vector_char_2.q.out index bdcf7d8..3da086a 100644 --- a/ql/src/test/results/clientpositive/llap/vector_char_2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_char_2.q.out @@ -47,18 +47,22 @@ val_10 10 1 val_100 200 2 val_103 206 2 val_104 208 2 -PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +PREHOOK: query: explain vectorization expression select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value asc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +POSTHOOK: query: explain vectorization expression select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value asc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -77,12 +81,27 @@ STAGE PLANS: TableScan alias: char_2 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: value (type: char(20)), UDFToInteger(key) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2] + selectExpressions: VectorUDFAdaptor(UDFToInteger(key)) -> 2:int Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count() + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 + native: false + projectedOutputColumns: [0, 1] keys: _col0 (type: char(20)) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -91,16 +110,43 @@ STAGE PLANS: key expressions: _col0 (type: char(20)) sort order: + Map-reduce partition columns: _col0 (type: char(20)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1) -> bigint, VectorUDAFCountMerge(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0, 1] keys: KEY._col0 (type: char(20)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -108,21 +154,43 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: char(20)) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 5 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -177,18 +245,22 @@ val_97 194 2 val_96 96 1 val_95 190 2 val_92 92 1 -PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +PREHOOK: query: explain vectorization expression select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value desc limit 5 PREHOOK: type: QUERY -POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +POSTHOOK: query: explain vectorization expression select value, sum(cast(key as int)), count(*) numrows from char_2 group by value order by value desc limit 5 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -207,12 +279,27 @@ STAGE PLANS: TableScan alias: char_2 Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: value (type: char(20)), UDFToInteger(key) (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2] + selectExpressions: VectorUDFAdaptor(UDFToInteger(key)) -> 2:int Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count() + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1 + native: false + projectedOutputColumns: [0, 1] keys: _col0 (type: char(20)) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -221,16 +308,43 @@ STAGE PLANS: key expressions: _col0 (type: char(20)) sort order: - Map-reduce partition columns: _col0 (type: char(20)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1) -> bigint, VectorUDAFCountMerge(col 2) -> bigint + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 0 + native: false + projectedOutputColumns: [0, 1] keys: KEY._col0 (type: char(20)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -238,21 +352,43 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: char(20)) sort order: - + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 5 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vector_char_4.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_char_4.q.out b/ql/src/test/results/clientpositive/llap/vector_char_4.q.out index 6d55ab0..d164ebe 100644 --- a/ql/src/test/results/clientpositive/llap/vector_char_4.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_char_4.q.out @@ -121,12 +121,16 @@ POSTHOOK: query: create table char_lazy_binary_columnar(ct char(10), csi char(10 POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@char_lazy_binary_columnar -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -143,12 +147,23 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS CHAR(10) (type: char(10)), CAST( si AS CHAR(10) (type: char(10)), CAST( i AS CHAR(20) (type: char(20)), CAST( b AS CHAR(30) (type: char(30)), CAST( f AS CHAR(20) (type: char(20)), CAST( d AS CHAR(20) (type: char(20)), CAST( s AS CHAR(50) (type: char(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] + selectExpressions: CastLongToChar(col 0, maxLength 10) -> 13:Char, CastLongToChar(col 1, maxLength 10) -> 14:Char, CastLongToChar(col 2, maxLength 20) -> 15:Char, CastLongToChar(col 3, maxLength 30) -> 16:Char, VectorUDFAdaptor(CAST( f AS CHAR(20)) -> 17:char(20), VectorUDFAdaptor(CAST( d AS CHAR(20)) -> 18:char(20), CastStringGroupToChar(col 8, maxLength 50) -> 19:Char Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -157,6 +172,14 @@ STAGE PLANS: name: default.char_lazy_binary_columnar Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-2 Dependency Collection http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out b/ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out index f2a386e..67b575c 100644 --- a/ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out @@ -124,10 +124,14 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@char_join1_str_orc POSTHOOK: Lineage: char_join1_str_orc.c1 SIMPLE [(char_join1_str)char_join1_str.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: char_join1_str_orc.c2 SIMPLE [(char_join1_str)char_join1_str.FieldSchema(name:c2, type:string, comment:null), ] -PREHOOK: query: explain select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +PREHOOK: query: explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: query: explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc1_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -146,12 +150,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: c2 is not null (type: boolean) Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: char(10)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -159,6 +174,10 @@ STAGE PLANS: keys: 0 _col1 (type: char(10)) 1 _col1 (type: char(10)) + Map Join Vectorization: + className: VectorMapJoinInnerStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 3 @@ -166,39 +185,89 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(10)), _col2 (type: int), _col3 (type: char(10)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: c2 is not null (type: boolean) Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: char(10)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: char(10)) sort order: + Map-reduce partition columns: _col1 (type: char(10)) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: char(10)), VALUE._col1 (type: int), VALUE._col2 (type: char(10)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -224,10 +293,14 @@ POSTHOOK: Input: default@char_join1_vc1_orc 2 abc 1 abc 2 abc 2 abc 3 abc 3 abc -PREHOOK: query: explain select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +PREHOOK: query: explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: query: explain vectorization expression select * from char_join1_vc1_orc a join char_join1_vc2_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -246,32 +319,66 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: c2 is not null (type: boolean) Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: char(10)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: char(20)) sort order: + Map-reduce partition columns: _col1 (type: char(20)) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: c2 is not null (type: boolean) Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: char(20)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -279,6 +386,10 @@ STAGE PLANS: keys: 0 _col1 (type: char(20)) 1 _col1 (type: char(20)) + Map Join Vectorization: + className: VectorMapJoinInnerStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 @@ -286,19 +397,46 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(10)), _col2 (type: int), _col3 (type: char(20)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: char(10)), VALUE._col1 (type: int), VALUE._col2 (type: char(20)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -326,10 +464,14 @@ POSTHOOK: Input: default@char_join1_vc2_orc 2 abc 1 abc 2 abc 2 abc 3 abc 3 abc -PREHOOK: query: explain select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +PREHOOK: query: explain vectorization expression select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 PREHOOK: type: QUERY -POSTHOOK: query: explain select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: query: explain vectorization expression select * from char_join1_vc1_orc a join char_join1_str_orc b on (a.c2 = b.c2) order by a.c1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -348,12 +490,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: c2 is not null (type: boolean) Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: char(10)) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -361,6 +514,11 @@ STAGE PLANS: keys: 0 UDFToString(_col1) (type: string) 1 _col1 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: CastStringGroupToString(col 1) -> 2:String + className: VectorMapJoinInnerStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 3 @@ -368,39 +526,89 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: char(10)), _col2 (type: int), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: c2 is not null (type: boolean) Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: int), c2 (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 273 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: char(10)), VALUE._col1 (type: int), VALUE._col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 323 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat
