http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/spark/vector_inner_join.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/vector_inner_join.q.out b/ql/src/test/results/clientpositive/spark/vector_inner_join.q.out index ef19bad..511bd79 100644 --- a/ql/src/test/results/clientpositive/spark/vector_inner_join.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_inner_join.q.out @@ -32,16 +32,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__2 POSTHOOK: Output: default@orc_table_2a POSTHOOK: Lineage: orc_table_2a.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -57,45 +53,18 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 1 - includeColumns: [0] - dataColumns: c:int - partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -108,23 +77,12 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -132,14 +90,6 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Map Join Vectorization: - bigTableKeyColumns: [0] - bigTableRetainedColumns: [0] - bigTableValueColumns: [0] - className: VectorMapJoinInnerBigOnlyLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true - projectedOutputColumns: [0] outputColumnNames: _col1 input vertices: 0 Map 1 @@ -147,35 +97,15 @@ STAGE PLANS: Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 1 - includeColumns: [0] - dataColumns: a:int - partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -196,16 +126,12 @@ POSTHOOK: Input: default@orc_table_1a POSTHOOK: Input: default@orc_table_2a #### A masked pattern was here #### 3 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -221,56 +147,23 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 0 - native: false - projectedOutputColumns: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 1 - includeColumns: [0] - dataColumns: a:int - partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -283,23 +176,12 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -307,42 +189,18 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Map Join Vectorization: - bigTableKeyColumns: [0] - bigTableRetainedColumns: [0] - bigTableValueColumns: [0] - className: VectorMapJoinLeftSemiLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true - projectedOutputColumns: [0] outputColumnNames: _col0 input vertices: 1 Map 2 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 1 - includeColumns: [0] - dataColumns: c:int - partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -399,16 +257,12 @@ POSTHOOK: Input: default@values__tmp__table__4 POSTHOOK: Output: default@orc_table_2b POSTHOOK: Lineage: orc_table_2b.c EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: orc_table_2b.v2 SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -424,45 +278,18 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col0 (type: int) 1 _col1 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: v1:string, a:int - partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -475,23 +302,12 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -499,14 +315,6 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) - Map Join Vectorization: - bigTableKeyColumns: [0] - bigTableRetainedColumns: [0] - className: VectorMapJoinInnerLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true - projectedOutputColumns: [2, 0] - smallTableMapping: [2] outputColumnNames: _col1, _col2 input vertices: 1 Map 2 @@ -514,36 +322,15 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [2, 0] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: c:int, v2:string - partitionColumnCount: 0 - scratchColumnTypeNames: String Local Work: Map Reduce Local Work @@ -564,16 +351,12 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 3 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -589,45 +372,18 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: c:int, v2:string - partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -640,23 +396,12 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -664,44 +409,18 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - Map Join Vectorization: - bigTableKeyColumns: [1] - bigTableRetainedColumns: [0, 1] - bigTableValueColumns: [0, 1] - className: VectorMapJoinInnerLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true - projectedOutputColumns: [0, 1, 1, 2] - smallTableMapping: [2] outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: v1:string, a:int - partitionColumnCount: 0 - scratchColumnTypeNames: String Local Work: Map Reduce Local Work @@ -722,16 +441,12 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 3 3 THREE -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -747,45 +462,18 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col0 (type: int) 1 _col1 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: v1:string, a:int - partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -798,23 +486,12 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -822,15 +499,6 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) - Map Join Vectorization: - bigTableKeyColumns: [0] - bigTableRetainedColumns: [0, 1] - bigTableValueColumns: [0, 1] - className: VectorMapJoinInnerLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true - projectedOutputColumns: [0, 1, 2, 0] - smallTableMapping: [2] outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 @@ -838,37 +506,15 @@ STAGE PLANS: Select Operator expressions: _col2 (type: string), (_col3 * 2) (type: int), (_col0 * 5) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [2, 3, 4, 1] - selectExpressions: LongColMultiplyLongScalar(col 0, val 2) -> 3:long, LongColMultiplyLongScalar(col 0, val 5) -> 4:long Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: c:int, v2:string - partitionColumnCount: 0 - scratchColumnTypeNames: String, bigint, bigint Local Work: Map Reduce Local Work @@ -889,16 +535,12 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three 6 15 THREE -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -914,45 +556,18 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col0 (type: int) 1 _col1 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: v1:string, a:int - partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -965,23 +580,12 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -989,15 +593,6 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) - Map Join Vectorization: - bigTableKeyColumns: [0] - bigTableRetainedColumns: [0, 1] - bigTableValueColumns: [0, 1] - className: VectorMapJoinInnerLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true - projectedOutputColumns: [0, 1, 2] - smallTableMapping: [2] outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 @@ -1005,36 +600,15 @@ STAGE PLANS: Select Operator expressions: _col2 (type: string), _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [2, 1, 0] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: c:int, v2:string - partitionColumnCount: 0 - scratchColumnTypeNames: String Local Work: Map Reduce Local Work @@ -1055,16 +629,12 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three THREE 3 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -1080,45 +650,18 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col0 (type: int) 1 _col1 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: v1:string, a:int - partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -1131,23 +674,12 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -1155,15 +687,6 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) - Map Join Vectorization: - bigTableKeyColumns: [0] - bigTableRetainedColumns: [0, 1] - bigTableValueColumns: [1] - className: VectorMapJoinInnerLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true - projectedOutputColumns: [1, 2, 0] - smallTableMapping: [2] outputColumnNames: _col1, _col2, _col3 input vertices: 1 Map 2 @@ -1171,36 +694,15 @@ STAGE PLANS: Select Operator expressions: _col3 (type: int), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 2, 1] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: c:int, v2:string - partitionColumnCount: 0 - scratchColumnTypeNames: String Local Work: Map Reduce Local Work @@ -1221,16 +723,12 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### 3 three THREE -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -1246,45 +744,18 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: c:int, v2:string - partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -1297,23 +768,12 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -1321,15 +781,6 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - Map Join Vectorization: - bigTableKeyColumns: [1] - bigTableRetainedColumns: [0, 1] - bigTableValueColumns: [0] - className: VectorMapJoinInnerLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true - projectedOutputColumns: [0, 1, 2] - smallTableMapping: [2] outputColumnNames: _col0, _col2, _col3 input vertices: 1 Map 2 @@ -1337,36 +788,15 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col3 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 2, 1] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: v1:string, a:int - partitionColumnCount: 0 - scratchColumnTypeNames: String Local Work: Map Reduce Local Work @@ -1387,16 +817,12 @@ POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b #### A masked pattern was here #### three THREE 3 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -1412,45 +838,18 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: c:int, v2:string - partitionColumnCount: 0 Local Work: Map Reduce Local Work @@ -1463,23 +862,12 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -1487,15 +875,6 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - Map Join Vectorization: - bigTableKeyColumns: [1] - bigTableRetainedColumns: [0, 1] - bigTableValueColumns: [0, 1] - className: VectorMapJoinInnerLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true - projectedOutputColumns: [0, 1, 2] - smallTableMapping: [2] outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 2 @@ -1503,36 +882,15 @@ STAGE PLANS: Select Operator expressions: _col1 (type: int), _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1, 0, 2] Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: v1:string, a:int - partitionColumnCount: 0 - scratchColumnTypeNames: String Local Work: Map Reduce Local Work
http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out b/ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out index 91af229..c08fbda 100644 --- a/ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization +PREHOOK: query: explain select count(*) from (select c.ctinyint from alltypesorc c left outer join alltypesorc cd @@ -7,7 +7,7 @@ left outer join alltypesorc hd on hd.ctinyint = c.ctinyint ) t1 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization +POSTHOOK: query: explain select count(*) from (select c.ctinyint from alltypesorc c left outer join alltypesorc cd @@ -16,10 +16,6 @@ left outer join alltypesorc hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -44,14 +40,6 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work Map 4 @@ -68,14 +56,6 @@ STAGE PLANS: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work @@ -123,25 +103,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work Reducer 2 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out index f4a1e6e..012c3eb 100644 --- a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out @@ -4,7 +4,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS -- Query copied from subquery_in.q -- non agg, non corr, with join in Parent Query -explain vectorization expression +explain select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and @@ -16,16 +16,12 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS -- Query copied from subquery_in.q -- non agg, non corr, with join in Parent Query -explain vectorization expression +explain select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -59,10 +55,6 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Local Work: Map Reduce Local Work Map 3 @@ -83,37 +75,17 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 4 Execution mode: vectorized Local Work: Map Reduce Local Work - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 0 - native: false - projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col1 (type: int) 1 _col0 (type: int) @@ -165,10 +137,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Local Work: Map Reduce Local Work @@ -203,23 +171,19 @@ POSTHOOK: Input: default@lineitem 64128 9141 82704 7721 PREHOOK: query: -- non agg, corr, with join in Parent Query -explain vectorization expression +explain select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) PREHOOK: type: QUERY POSTHOOK: query: -- non agg, corr, with join in Parent Query -explain vectorization expression +explain select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -253,10 +217,6 @@ STAGE PLANS: keys: 0 _col0 (type: int), 1 (type: int) 1 _col0 (type: int), _col1 (type: int) - Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Local Work: Map Reduce Local Work Map 3 @@ -277,37 +237,17 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 4 Execution mode: vectorized Local Work: Map Reduce Local Work - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 0 - native: false - projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col1 (type: int) 1 _col0 (type: int) @@ -359,10 +299,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Local Work: Map Reduce Local Work http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out b/ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out index cbf7d03..4710a73 100644 --- a/ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out @@ -101,16 +101,12 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain select bo, max(b) from vectortab2korc group by bo order by bo desc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain select bo, max(b) from vectortab2korc group by bo order by bo desc POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -128,26 +124,12 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: bo (type: boolean), b (type: bigint) outputColumnNames: bo, b - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [7, 3] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(b) - Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 3) -> bigint - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 7 - native: false - projectedOutputColumns: [0] keys: bo (type: boolean) mode: hash outputColumnNames: _col0, _col1 @@ -156,41 +138,14 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reducer 2 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) - Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 1) -> bigint - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 0 - native: false - projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 @@ -198,36 +153,17 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: boolean) sort order: - - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1] Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat