http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out b/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out index cf90430..06e30d8 100644 --- a/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out @@ -49,15 +49,11 @@ POSTHOOK: Output: default@myinput1 POSTHOOK: Lineage: myinput1.key SIMPLE [(myinput1_txt)myinput1_txt.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: myinput1.value SIMPLE [(myinput1_txt)myinput1_txt.FieldSchema(name:value, type:int, comment:null), ] PREHOOK: query: -- merging -explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value +explain select * from myinput1 a join myinput1 b on a.key<=>b.value PREHOOK: type: QUERY POSTHOOK: query: -- merging -explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value +explain select * from myinput1 a join myinput1 b on a.key<=>b.value POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -75,20 +71,12 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Map Join Operator condition map: Inner Join 0 to 1 keys: 0 key (type: int) 1 value (type: int) - Map Join Vectorization: - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, No nullsafe IS false nullSafes: [true] outputColumnNames: _col0, _col1, _col5, _col6 input vertices: @@ -97,16 +85,9 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -114,42 +95,19 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: value (type: int) sort order: + Map-reduce partition columns: value (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Stage: Stage-0 Fetch Operator @@ -176,14 +134,10 @@ NULL 35 NULL NULL NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL -PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key +PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key +POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -201,14 +155,7 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -219,11 +166,6 @@ STAGE PLANS: 0 key (type: int) 1 value (type: int) 2 key (type: int) - Map Join Vectorization: - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: 1 Map 2 @@ -232,16 +174,9 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -249,84 +184,38 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: value is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: value (type: int) sort order: + Map-reduce partition columns: value (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: int) Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Stage: Stage-0 Fetch Operator @@ -344,14 +233,10 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 10 NULL NULL 10 10 NULL 100 100 100 100 100 100 -PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key +PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key +POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -369,9 +254,6 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Map Join Operator condition map: Inner Join 0 to 1 @@ -380,11 +262,6 @@ STAGE PLANS: 0 key (type: int) 1 value (type: int) 2 key (type: int) - Map Join Vectorization: - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false, No nullsafe IS false nullSafes: [true] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: @@ -394,16 +271,9 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -411,70 +281,32 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: value (type: int) sort order: + Map-reduce partition columns: value (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: int) Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Stage: Stage-0 Fetch Operator @@ -519,14 +351,10 @@ NULL NULL 48 NULL NULL NULL NULL NULL NULL NULL NULL 10 NULL NULL NULL NULL NULL 35 NULL NULL NULL NULL NULL NULL -PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value +PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value +POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -544,14 +372,7 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: value is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -562,11 +383,6 @@ STAGE PLANS: 0 key (type: int), value (type: int) 1 value (type: int), key (type: int) 2 key (type: int), value (type: int) - Map Join Vectorization: - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false, No nullsafe IS false nullSafes: [true, false] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: @@ -576,16 +392,9 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -593,82 +402,36 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: value (type: int), key (type: int) sort order: ++ Map-reduce partition columns: value (type: int), key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: value is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int), value (type: int) sort order: ++ Map-reduce partition columns: key (type: int), value (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Stage: Stage-0 Fetch Operator @@ -686,14 +449,10 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 100 100 100 100 100 100 NULL 10 10 NULL NULL 10 -PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value +PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value +POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -711,9 +470,6 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Map Join Operator condition map: Inner Join 0 to 1 @@ -722,11 +478,6 @@ STAGE PLANS: 0 key (type: int), value (type: int) 1 value (type: int), key (type: int) 2 key (type: int), value (type: int) - Map Join Vectorization: - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false, No nullsafe IS false nullSafes: [true, true] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: @@ -736,16 +487,9 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -753,68 +497,30 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: value (type: int), key (type: int) sort order: ++ Map-reduce partition columns: value (type: int), key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int), value (type: int) sort order: ++ Map-reduce partition columns: key (type: int), value (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Stage: Stage-0 Fetch Operator @@ -938,15 +644,11 @@ NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL PREHOOK: query: -- merging -explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value +explain select * from myinput1 a join myinput1 b on a.key<=>b.value PREHOOK: type: QUERY POSTHOOK: query: -- merging -explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value +explain select * from myinput1 a join myinput1 b on a.key<=>b.value POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -964,20 +666,12 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Map Join Operator condition map: Inner Join 0 to 1 keys: 0 key (type: int) 1 value (type: int) - Map Join Vectorization: - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true - nativeConditionsNotMet: No nullsafe IS false nullSafes: [true] outputColumnNames: _col0, _col1, _col5, _col6 input vertices: @@ -986,16 +680,9 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1003,42 +690,19 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: value (type: int) sort order: + Map-reduce partition columns: value (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Stage: Stage-0 Fetch Operator @@ -1065,14 +729,10 @@ NULL 35 NULL NULL NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL -PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key +PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key +POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1090,14 +750,7 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -1108,11 +761,6 @@ STAGE PLANS: 0 key (type: int) 1 value (type: int) 2 key (type: int) - Map Join Vectorization: - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true - nativeConditionsNotMet: One MapJoin Condition IS false outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: 1 Map 2 @@ -1121,16 +769,9 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1138,84 +779,38 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: value is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: value (type: int) sort order: + Map-reduce partition columns: value (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: int) Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Stage: Stage-0 Fetch Operator @@ -1233,14 +828,10 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 10 NULL NULL 10 10 NULL 100 100 100 100 100 100 -PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key +PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key +POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1258,9 +849,6 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Map Join Operator condition map: Inner Join 0 to 1 @@ -1269,11 +857,6 @@ STAGE PLANS: 0 key (type: int) 1 value (type: int) 2 key (type: int) - Map Join Vectorization: - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true - nativeConditionsNotMet: One MapJoin Condition IS false, No nullsafe IS false nullSafes: [true] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: @@ -1283,16 +866,9 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1300,70 +876,32 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: value (type: int) sort order: + Map-reduce partition columns: value (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int) Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: int) Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Stage: Stage-0 Fetch Operator @@ -1408,14 +946,10 @@ NULL NULL 48 NULL NULL NULL NULL NULL NULL NULL NULL 10 NULL NULL NULL NULL NULL 35 NULL NULL NULL NULL NULL NULL -PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value +PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value +POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value=b.key join myinput1 c on a.key<=>c.key AND a.value=c.value POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1433,14 +967,7 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: value is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -1451,11 +978,6 @@ STAGE PLANS: 0 key (type: int), value (type: int) 1 value (type: int), key (type: int) 2 key (type: int), value (type: int) - Map Join Vectorization: - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true - nativeConditionsNotMet: One MapJoin Condition IS false, No nullsafe IS false nullSafes: [true, false] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: @@ -1465,16 +987,9 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1482,82 +997,36 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: value (type: int), key (type: int) sort order: ++ Map-reduce partition columns: value (type: int), key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 1) -> boolean predicate: value is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int), value (type: int) sort order: ++ Map-reduce partition columns: key (type: int), value (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Stage: Stage-0 Fetch Operator @@ -1575,14 +1044,10 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 100 100 100 100 100 100 NULL 10 10 NULL NULL 10 -PREHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value +PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value +POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1600,9 +1065,6 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Map Join Operator condition map: Inner Join 0 to 1 @@ -1611,11 +1073,6 @@ STAGE PLANS: 0 key (type: int), value (type: int) 1 value (type: int), key (type: int) 2 key (type: int), value (type: int) - Map Join Vectorization: - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true - nativeConditionsNotMet: One MapJoin Condition IS false, No nullsafe IS false nullSafes: [true, true] outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 input vertices: @@ -1625,16 +1082,9 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1642,68 +1092,30 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: value (type: int), key (type: int) sort order: ++ Map-reduce partition columns: value (type: int), key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1] Reduce Output Operator key expressions: key (type: int), value (type: int) sort order: ++ Map-reduce partition columns: key (type: int), value (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Stage: Stage-0 Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out b/ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out index c4bbeff..6479ec5 100644 --- a/ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out @@ -103,7 +103,7 @@ scratch.t scratch.si scratch.i scratch.b scratch.f scratch.d scratch.dc PREHOOK: query: -- -- Projection LongCol<Compare>LongScalar -- -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN SELECT sum(hash(*)) FROM (SELECT t, si, i, (t < 0) as compare1, (si <= 0) as compare2, (i = 0) as compare3 from vectortab2k_orc order by t, si, i) as q @@ -111,16 +111,12 @@ PREHOOK: type: QUERY POSTHOOK: query: -- -- Projection LongCol<Compare>LongScalar -- -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN SELECT sum(hash(*)) FROM (SELECT t, si, i, (t < 0) as compare1, (si <= 0) as compare2, (i = 0) as compare3 from vectortab2k_orc order by t, si, i) as q POSTHOOK: type: QUERY Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -138,45 +134,19 @@ STAGE PLANS: TableScan alias: vectortab2k_orc Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), (t < 0) (type: boolean), (si <= 0) (type: boolean), (i = 0) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1, 2, 7, 8, 9] - selectExpressions: LongColLessLongScalar(col 0, val 0) -> 7:long, LongColLessEqualLongScalar(col 1, val 0) -> 8:long, LongColEqualLongScalar(col 2, val 0) -> 9:long Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) sort order: +++ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reducer 2 Execution mode: llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Select expression for SELECT operator: UDF GenericUDFHash(Column[KEY.reducesinkkey0], Column[KEY.reducesinkkey1], Column[KEY.reducesinkkey2], Column[VALUE._col0], Column[VALUE._col1], Column[VALUE._col2]) not supported - vectorized: false Reduce Operator Tree: Select Operator expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0,VALUE._col1,VALUE._col2) (type: int) @@ -215,21 +185,17 @@ POSTHOOK: Input: default@vectortab2k_orc #### A masked pattern was here #### c0 -3601806268 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN SELECT sum(hash(*)) FROM (SELECT t, si, i, b, (t > 0) as compare1, (si >= 0) as compare2, (i != 0) as compare3, (b > 0) as compare4 from vectortab2k_orc order by t, si, i, b) as q PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN SELECT sum(hash(*)) FROM (SELECT t, si, i, b, (t > 0) as compare1, (si >= 0) as compare2, (i != 0) as compare3, (b > 0) as compare4 from vectortab2k_orc order by t, si, i, b) as q POSTHOOK: type: QUERY Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -247,45 +213,19 @@ STAGE PLANS: TableScan alias: vectortab2k_orc Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), (t > 0) (type: boolean), (si >= 0) (type: boolean), (i <> 0) (type: boolean), (b > 0) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1, 2, 3, 7, 8, 9, 10] - selectExpressions: LongColGreaterLongScalar(col 0, val 0) -> 7:long, LongColGreaterEqualLongScalar(col 1, val 0) -> 8:long, LongColNotEqualLongScalar(col 2, val 0) -> 9:long, LongColGreaterLongScalar(col 3, val 0) -> 10:long Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint) sort order: ++++ - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reducer 2 Execution mode: llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Select expression for SELECT operator: UDF GenericUDFHash(Column[KEY.reducesinkkey0], Column[KEY.reducesinkkey1], Column[KEY.reducesinkkey2], Column[KEY.reducesinkkey3], Column[VALUE._col0], Column[VALUE._col1], Column[VALUE._col2], Column[VALUE._col3]) not supported - vectorized: false Reduce Operator Tree: Select Operator expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,KEY.reducesinkkey3,VALUE._col0,VALUE._col1,VALUE._col2,VALUE._col3) (type: int)