http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_join30.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_join30.q.out b/ql/src/test/results/clientpositive/llap/vector_join30.q.out index 4b2f06f..9238bc7 100644 --- a/ql/src/test/results/clientpositive/llap/vector_join30.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_join30.q.out @@ -10,7 +10,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@orcsrc_n0 POSTHOOK: Lineage: orcsrc_n0.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcsrc_n0.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x JOIN @@ -18,7 +18,7 @@ JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x JOIN @@ -51,6 +51,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -72,6 +73,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE @@ -86,6 +88,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -94,6 +102,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -116,9 +125,15 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:string className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:string, 1:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col2, _col3 input vertices: 0 Map 1 @@ -141,6 +156,7 @@ STAGE PLANS: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -154,14 +170,27 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -192,26 +221,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -POSTHOOK: query: FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -103231310608 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x LEFT OUTER JOIN @@ -219,7 +229,7 @@ LEFT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x LEFT OUTER JOIN @@ -251,6 +261,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -266,9 +277,15 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [] className: VectorMapJoinOuterStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 0 -> 3 + projectedOutput: 3:string, 4:string + smallTableValueMapping: 4:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col2, _col3 input vertices: 1 Map 3 @@ -291,6 +308,7 @@ STAGE PLANS: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -304,6 +322,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, string, bigint] Map 3 Map Operator Tree: TableScan @@ -311,6 +335,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -325,8 +350,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -340,14 +367,27 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -378,26 +418,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -LEFT OUTER JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -POSTHOOK: query: FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -LEFT OUTER JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -103231310608 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x RIGHT OUTER JOIN @@ -405,7 +426,7 @@ RIGHT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x RIGHT OUTER JOIN @@ -437,6 +458,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -451,6 +473,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -465,6 +488,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -472,6 +501,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -487,9 +517,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:string className: VectorMapJoinOuterStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:string, 1:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col2, _col3 input vertices: 0 Map 1 @@ -512,6 +547,7 @@ STAGE PLANS: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -525,14 +561,27 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -563,26 +612,196 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM +PREHOOK: query: explain vectorization detail +FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -RIGHT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -POSTHOOK: query: FROM +POSTHOOK: query: explain vectorization detail +FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -RIGHT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc_n0 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### -103231310608 -PREHOOK: query: explain vectorization expression + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 96342 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x JOIN @@ -593,7 +812,7 @@ JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x JOIN @@ -629,6 +848,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -650,6 +870,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE @@ -664,6 +885,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -672,6 +899,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -725,6 +953,7 @@ STAGE PLANS: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -738,6 +967,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -746,6 +981,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -767,6 +1003,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE @@ -781,14 +1018,27 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -819,32 +1069,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -POSTHOOK: query: FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -348019368476 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x JOIN @@ -855,7 +1080,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x JOIN @@ -890,6 +1115,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -904,6 +1130,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -918,6 +1145,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -925,6 +1158,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -939,8 +1173,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -954,6 +1190,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan @@ -961,6 +1203,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -975,6 +1218,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -989,6 +1233,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1011,14 +1261,24 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1049,32 +1309,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -LEFT OUTER JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -POSTHOOK: query: FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -LEFT OUTER JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -348019368476 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x LEFT OUTER JOIN @@ -1085,7 +1320,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x LEFT OUTER JOIN @@ -1120,6 +1355,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -1134,6 +1370,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -1148,6 +1385,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -1155,6 +1398,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -1169,8 +1413,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -1184,6 +1430,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan @@ -1191,6 +1443,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -1205,6 +1458,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -1219,6 +1473,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1241,14 +1501,24 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1279,32 +1549,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -LEFT OUTER JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -LEFT OUTER JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -POSTHOOK: query: FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -LEFT OUTER JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -LEFT OUTER JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -348019368476 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x LEFT OUTER JOIN @@ -1315,7 +1560,7 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x LEFT OUTER JOIN @@ -1350,6 +1595,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -1364,6 +1610,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -1378,6 +1625,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -1385,6 +1638,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -1399,8 +1653,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -1414,6 +1670,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan @@ -1421,6 +1683,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -1435,6 +1698,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -1449,6 +1713,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1471,14 +1741,24 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1509,32 +1789,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -LEFT OUTER JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -RIGHT OUTER JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -POSTHOOK: query: FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -LEFT OUTER JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -RIGHT OUTER JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -348019368476 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x RIGHT OUTER JOIN @@ -1545,7 +1800,7 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x RIGHT OUTER JOIN @@ -1580,6 +1835,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -1594,6 +1850,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -1608,6 +1865,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -1615,6 +1878,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -1629,8 +1893,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -1644,6 +1910,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan @@ -1651,6 +1923,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -1665,6 +1938,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE @@ -1679,6 +1953,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1701,14 +1981,24 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1739,28 +2029,1443 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM +PREHOOK: query: explain vectorization detail +FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -RIGHT OUTER JOIN +JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) -RIGHT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc_n0 -#### A masked pattern was here #### -POSTHOOK: query: FROM +POSTHOOK: query: explain vectorization detail +FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -RIGHT OUTER JOIN +JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +ON (x.key = Y.key) +FULL OUTER JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Full Outer Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 192684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization detail +FROM +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +FULL OUTER JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +ON (x.key = Y.key) +FULL OUTER JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +FROM +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +FULL OUTER JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +ON (x.key = Y.key) +FULL OUTER JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc_n0 + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + Full Outer Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 192684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial +
<TRUNCATED>