http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out
b/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out
index ca07200..d9e701a 100644
--- a/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out
@@ -32,12 +32,16 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@values__tmp__table__2
POSTHOOK: Output: default@orc_table_2a
POSTHOOK: Lineage: orc_table_2a.c EXPRESSION
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1,
type:string, comment:), ]
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where
t1.a > 2
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where
t1.a > 2
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -55,12 +59,23 @@ STAGE PLANS:
TableScan
alias: t2
Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression:
FilterLongColGreaterLongScalar(col 0, val 2) -> boolean
predicate: (c > 2) (type: boolean)
Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE
Column stats: NONE
Select Operator
expressions: c (type: int)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0]
Statistics: Num rows: 1 Data size: 3 Basic stats:
COMPLETE Column stats: NONE
Map Join Operator
condition map:
@@ -68,6 +83,13 @@ STAGE PLANS:
keys:
0 _col0 (type: int)
1 _col0 (type: int)
+ Map Join Vectorization:
+ bigTableKeyColumns: [0]
+ bigTableRetainedColumns: [0]
+ className: VectorMapJoinInnerBigOnlyLongOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS
true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table,
then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
+ projectedOutputColumns: [0]
outputColumnNames: _col1
input vertices:
1 Map 2
@@ -75,9 +97,16 @@ STAGE PLANS:
Select Operator
expressions: _col1 (type: int)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0]
Statistics: Num rows: 1 Data size: 3 Basic stats:
COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1 Data size: 3 Basic stats:
COMPLETE Column stats: NONE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -85,25 +114,66 @@ STAGE PLANS:
serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ includeColumns: [0]
+ dataColumns: c:int
+ partitionColumnCount: 0
Map 2
Map Operator Tree:
TableScan
alias: t1
Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression:
FilterLongColGreaterLongScalar(col 0, val 2) -> boolean
predicate: (a > 2) (type: boolean)
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: NONE
Select Operator
expressions: a (type: int)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0]
Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS
true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: NONE
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ includeColumns: [0]
+ dataColumns: a:int
+ partitionColumnCount: 0
Stage: Stage-0
Fetch Operator
@@ -122,12 +192,16 @@ POSTHOOK: Input: default@orc_table_1a
POSTHOOK: Input: default@orc_table_2a
#### A masked pattern was here ####
3
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c
where t2.c > 2
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c
where t2.c > 2
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -145,12 +219,23 @@ STAGE PLANS:
TableScan
alias: t2
Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression:
FilterLongColGreaterLongScalar(col 0, val 2) -> boolean
predicate: (c > 2) (type: boolean)
Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE
Column stats: NONE
Select Operator
expressions: c (type: int)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0]
Statistics: Num rows: 1 Data size: 3 Basic stats:
COMPLETE Column stats: NONE
Map Join Operator
condition map:
@@ -158,12 +243,23 @@ STAGE PLANS:
keys:
0 _col0 (type: int)
1 _col0 (type: int)
+ Map Join Vectorization:
+ bigTableKeyColumns: [0]
+ bigTableRetainedColumns: [0]
+ bigTableValueColumns: [0]
+ className: VectorMapJoinLeftSemiLongOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS
true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table,
then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
+ projectedOutputColumns: [0]
outputColumnNames: _col0
input vertices:
1 Map 2
Statistics: Num rows: 1 Data size: 3 Basic stats:
COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1 Data size: 3 Basic stats:
COMPLETE Column stats: NONE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -171,19 +267,49 @@ STAGE PLANS:
serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ includeColumns: [0]
+ dataColumns: c:int
+ partitionColumnCount: 0
Map 2
Map Operator Tree:
TableScan
alias: t1
Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression:
FilterLongColGreaterLongScalar(col 0, val 2) -> boolean
predicate: (a > 2) (type: boolean)
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE
Column stats: NONE
Select Operator
expressions: a (type: int)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0]
Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: NONE
Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0
+ native: false
+ projectedOutputColumns: []
keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0
@@ -192,9 +318,26 @@ STAGE PLANS:
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS
true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: NONE
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ includeColumns: [0]
+ dataColumns: a:int
+ partitionColumnCount: 0
Stage: Stage-0
Fetch Operator
@@ -249,12 +392,16 @@ POSTHOOK: Input: default@values__tmp__table__4
POSTHOOK: Output: default@orc_table_2b
POSTHOOK: Lineage: orc_table_2b.c EXPRESSION
[(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1,
type:string, comment:), ]
POSTHOOK: Lineage: orc_table_2b.v2 SIMPLE
[(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2,
type:string, comment:), ]
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c
where t1.a > 2
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c
where t1.a > 2
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -272,12 +419,23 @@ STAGE PLANS:
TableScan
alias: t2
Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression:
FilterLongColGreaterLongScalar(col 0, val 2) -> boolean
predicate: (c > 2) (type: boolean)
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Select Operator
expressions: c (type: int)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0]
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Map Join Operator
condition map:
@@ -285,6 +443,14 @@ STAGE PLANS:
keys:
0 _col0 (type: int)
1 _col1 (type: int)
+ Map Join Vectorization:
+ bigTableKeyColumns: [0]
+ bigTableRetainedColumns: [0]
+ className: VectorMapJoinInnerLongOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS
true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table,
then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
+ projectedOutputColumns: [2, 0]
+ smallTableMapping: [2]
outputColumnNames: _col1, _col2
input vertices:
1 Map 2
@@ -292,9 +458,16 @@ STAGE PLANS:
Select Operator
expressions: _col1 (type: string), _col2 (type: int)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [2, 0]
Statistics: Num rows: 1 Data size: 100 Basic stats:
COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1 Data size: 100 Basic
stats: COMPLETE Column stats: NONE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -302,26 +475,68 @@ STAGE PLANS:
serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0]
+ dataColumns: c:int, v2:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: String
Map 2
Map Operator Tree:
TableScan
alias: t1
Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression:
FilterLongColGreaterLongScalar(col 1, val 2) -> boolean
predicate: (a > 2) (type: boolean)
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Select Operator
expressions: v1 (type: string), a (type: int)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1]
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: int)
sort order: +
Map-reduce partition columns: _col1 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS
true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
value expressions: _col0 (type: string)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: v1:string, a:int
+ partitionColumnCount: 0
Stage: Stage-0
Fetch Operator
@@ -340,12 +555,16 @@ POSTHOOK: Input: default@orc_table_1b
POSTHOOK: Input: default@orc_table_2b
#### A masked pattern was here ####
three 3
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on
t1.a = t2.c where t1.a > 2
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on
t1.a = t2.c where t1.a > 2
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -363,32 +582,71 @@ STAGE PLANS:
TableScan
alias: t1
Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression:
FilterLongColGreaterLongScalar(col 1, val 2) -> boolean
predicate: (a > 2) (type: boolean)
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Select Operator
expressions: v1 (type: string), a (type: int)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1]
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: int)
sort order: +
Map-reduce partition columns: _col1 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS
true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
value expressions: _col0 (type: string)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: v1:string, a:int
+ partitionColumnCount: 0
Map 2
Map Operator Tree:
TableScan
alias: t2
Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression:
FilterLongColGreaterLongScalar(col 0, val 2) -> boolean
predicate: (c > 2) (type: boolean)
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Select Operator
expressions: c (type: int), v2 (type: string)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1]
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Map Join Operator
condition map:
@@ -396,12 +654,24 @@ STAGE PLANS:
keys:
0 _col1 (type: int)
1 _col0 (type: int)
+ Map Join Vectorization:
+ bigTableKeyColumns: [0]
+ bigTableRetainedColumns: [0, 1]
+ bigTableValueColumns: [0, 1]
+ className: VectorMapJoinInnerLongOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS
true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table,
then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
+ projectedOutputColumns: [2, 0, 0, 1]
+ smallTableMapping: [2]
outputColumnNames: _col0, _col1, _col2, _col3
input vertices:
0 Map 1
Statistics: Num rows: 1 Data size: 100 Basic stats:
COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1 Data size: 100 Basic stats:
COMPLETE Column stats: NONE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -409,6 +679,20 @@ STAGE PLANS:
serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: c:int, v2:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: String
Stage: Stage-0
Fetch Operator
@@ -427,12 +711,16 @@ POSTHOOK: Input: default@orc_table_1b
POSTHOOK: Input: default@orc_table_2b
#### A masked pattern was here ####
three 3 3 THREE
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1
on t1.a = t2.c where t1.a > 2
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1
on t1.a = t2.c where t1.a > 2
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -450,12 +738,23 @@ STAGE PLANS:
TableScan
alias: t2
Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression:
FilterLongColGreaterLongScalar(col 0, val 2) -> boolean
predicate: (c > 2) (type: boolean)
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Select Operator
expressions: c (type: int), v2 (type: string)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1]
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Map Join Operator
condition map:
@@ -463,6 +762,15 @@ STAGE PLANS:
keys:
0 _col0 (type: int)
1 _col1 (type: int)
+ Map Join Vectorization:
+ bigTableKeyColumns: [0]
+ bigTableRetainedColumns: [0, 1]
+ bigTableValueColumns: [0, 1]
+ className: VectorMapJoinInnerLongOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS
true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table,
then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
+ projectedOutputColumns: [0, 1, 2, 0]
+ smallTableMapping: [2]
outputColumnNames: _col0, _col1, _col2, _col3
input vertices:
1 Map 2
@@ -470,9 +778,17 @@ STAGE PLANS:
Select Operator
expressions: _col2 (type: string), (_col3 * 2)
(type: int), (_col0 * 5) (type: int), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [2, 3, 4, 1]
+ selectExpressions: LongColMultiplyLongScalar(col
0, val 2) -> 3:long, LongColMultiplyLongScalar(col 0, val 5) -> 4:long
Statistics: Num rows: 1 Data size: 100 Basic stats:
COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1 Data size: 100 Basic
stats: COMPLETE Column stats: NONE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -480,26 +796,68 @@ STAGE PLANS:
serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: c:int, v2:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: String, bigint, bigint
Map 2
Map Operator Tree:
TableScan
alias: t1
Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression:
FilterLongColGreaterLongScalar(col 1, val 2) -> boolean
predicate: (a > 2) (type: boolean)
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Select Operator
expressions: v1 (type: string), a (type: int)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1]
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: int)
sort order: +
Map-reduce partition columns: _col1 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS
true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
value expressions: _col0 (type: string)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: v1:string, a:int
+ partitionColumnCount: 0
Stage: Stage-0
Fetch Operator
@@ -518,12 +876,16 @@ POSTHOOK: Input: default@orc_table_1b
POSTHOOK: Input: default@orc_table_2b
#### A masked pattern was here ####
three 6 15 THREE
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a =
t2.c where t1.a > 2
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a =
t2.c where t1.a > 2
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -541,12 +903,23 @@ STAGE PLANS:
TableScan
alias: t2
Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression:
FilterLongColGreaterLongScalar(col 0, val 2) -> boolean
predicate: (c > 2) (type: boolean)
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Select Operator
expressions: c (type: int), v2 (type: string)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1]
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Map Join Operator
condition map:
@@ -554,6 +927,15 @@ STAGE PLANS:
keys:
0 _col0 (type: int)
1 _col1 (type: int)
+ Map Join Vectorization:
+ bigTableKeyColumns: [0]
+ bigTableRetainedColumns: [0, 1]
+ bigTableValueColumns: [0, 1]
+ className: VectorMapJoinInnerLongOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS
true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table,
then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
+ projectedOutputColumns: [0, 1, 2]
+ smallTableMapping: [2]
outputColumnNames: _col0, _col1, _col2
input vertices:
1 Map 2
@@ -561,9 +943,16 @@ STAGE PLANS:
Select Operator
expressions: _col2 (type: string), _col1 (type:
string), _col0 (type: int)
outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [2, 1, 0]
Statistics: Num rows: 1 Data size: 100 Basic stats:
COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1 Data size: 100 Basic
stats: COMPLETE Column stats: NONE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -571,26 +960,68 @@ STAGE PLANS:
serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: c:int, v2:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: String
Map 2
Map Operator Tree:
TableScan
alias: t1
Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression:
FilterLongColGreaterLongScalar(col 1, val 2) -> boolean
predicate: (a > 2) (type: boolean)
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Select Operator
expressions: v1 (type: string), a (type: int)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1]
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: int)
sort order: +
Map-reduce partition columns: _col1 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS
true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
value expressions: _col0 (type: string)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: v1:string, a:int
+ partitionColumnCount: 0
Stage: Stage-0
Fetch Operator
@@ -609,12 +1040,16 @@ POSTHOOK: Input: default@orc_table_1b
POSTHOOK: Input: default@orc_table_2b
#### A masked pattern was here ####
three THREE 3
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a =
t2.c where t1.a > 2
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a =
t2.c where t1.a > 2
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -632,12 +1067,23 @@ STAGE PLANS:
TableScan
alias: t2
Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression:
FilterLongColGreaterLongScalar(col 0, val 2) -> boolean
predicate: (c > 2) (type: boolean)
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Select Operator
expressions: c (type: int), v2 (type: string)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1]
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Map Join Operator
condition map:
@@ -645,6 +1091,15 @@ STAGE PLANS:
keys:
0 _col0 (type: int)
1 _col1 (type: int)
+ Map Join Vectorization:
+ bigTableKeyColumns: [0]
+ bigTableRetainedColumns: [0, 1]
+ bigTableValueColumns: [1]
+ className: VectorMapJoinInnerLongOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS
true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table,
then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
+ projectedOutputColumns: [1, 2, 0]
+ smallTableMapping: [2]
outputColumnNames: _col1, _col2, _col3
input vertices:
1 Map 2
@@ -652,9 +1107,16 @@ STAGE PLANS:
Select Operator
expressions: _col3 (type: int), _col2 (type:
string), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 2, 1]
Statistics: Num rows: 1 Data size: 100 Basic stats:
COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1 Data size: 100 Basic
stats: COMPLETE Column stats: NONE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -662,26 +1124,68 @@ STAGE PLANS:
serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: c:int, v2:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: String
Map 2
Map Operator Tree:
TableScan
alias: t1
Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression:
FilterLongColGreaterLongScalar(col 1, val 2) -> boolean
predicate: (a > 2) (type: boolean)
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Select Operator
expressions: v1 (type: string), a (type: int)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1]
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: int)
sort order: +
Map-reduce partition columns: _col1 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS
true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
value expressions: _col0 (type: string)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: v1:string, a:int
+ partitionColumnCount: 0
Stage: Stage-0
Fetch Operator
@@ -700,12 +1204,16 @@ POSTHOOK: Input: default@orc_table_1b
POSTHOOK: Input: default@orc_table_2b
#### A masked pattern was here ####
3 three THREE
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a =
t2.c where t1.a > 2
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a =
t2.c where t1.a > 2
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -723,32 +1231,71 @@ STAGE PLANS:
TableScan
alias: t1
Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression:
FilterLongColGreaterLongScalar(col 1, val 2) -> boolean
predicate: (a > 2) (type: boolean)
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Select Operator
expressions: v1 (type: string), a (type: int)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1]
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: int)
sort order: +
Map-reduce partition columns: _col1 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS
true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
value expressions: _col0 (type: string)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: v1:string, a:int
+ partitionColumnCount: 0
Map 2
Map Operator Tree:
TableScan
alias: t2
Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression:
FilterLongColGreaterLongScalar(col 0, val 2) -> boolean
predicate: (c > 2) (type: boolean)
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Select Operator
expressions: c (type: int), v2 (type: string)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1]
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Map Join Operator
condition map:
@@ -756,6 +1303,15 @@ STAGE PLANS:
keys:
0 _col1 (type: int)
1 _col0 (type: int)
+ Map Join Vectorization:
+ bigTableKeyColumns: [0]
+ bigTableRetainedColumns: [0, 1]
+ bigTableValueColumns: [0, 1]
+ className: VectorMapJoinInnerLongOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS
true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table,
then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
+ projectedOutputColumns: [2, 0, 1]
+ smallTableMapping: [2]
outputColumnNames: _col0, _col2, _col3
input vertices:
0 Map 1
@@ -763,9 +1319,16 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col3 (type:
string), _col2 (type: int)
outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [2, 1, 0]
Statistics: Num rows: 1 Data size: 100 Basic stats:
COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1 Data size: 100 Basic
stats: COMPLETE Column stats: NONE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -773,6 +1336,20 @@ STAGE PLANS:
serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: c:int, v2:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: String
Stage: Stage-0
Fetch Operator
@@ -791,12 +1368,16 @@ POSTHOOK: Input: default@orc_table_1b
POSTHOOK: Input: default@orc_table_2b
#### A masked pattern was here ####
three THREE 3
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a =
t2.c where t1.a > 2
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a =
t2.c where t1.a > 2
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -814,32 +1395,71 @@ STAGE PLANS:
TableScan
alias: t1
Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression:
FilterLongColGreaterLongScalar(col 1, val 2) -> boolean
predicate: (a > 2) (type: boolean)
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Select Operator
expressions: v1 (type: string), a (type: int)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1]
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: int)
sort order: +
Map-reduce partition columns: _col1 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS
true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
value expressions: _col0 (type: string)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: v1:string, a:int
+ partitionColumnCount: 0
Map 2
Map Operator Tree:
TableScan
alias: t2
Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression:
FilterLongColGreaterLongScalar(col 0, val 2) -> boolean
predicate: (c > 2) (type: boolean)
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Select Operator
expressions: c (type: int), v2 (type: string)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1]
Statistics: Num rows: 1 Data size: 91 Basic stats:
COMPLETE Column stats: NONE
Map Join Operator
condition map:
@@ -847,6 +1467,15 @@ STAGE PLANS:
keys:
0 _col1 (type: int)
1 _col0 (type: int)
+ Map Join Vectorization:
+ bigTableKeyColumns: [0]
+ bigTableRetainedColumns: [0, 1]
+ bigTableValueColumns: [1]
+ className: VectorMapJoinInnerLongOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS
true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table,
then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
+ projectedOutputColumns: [2, 0, 1]
+ smallTableMapping: [2]
outputColumnNames: _col0, _col1, _col3
input vertices:
0 Map 1
@@ -854,9 +1483,16 @@ STAGE PLANS:
Select Operator
expressions: _col1 (type: int), _col0 (type:
string), _col3 (type: string)
outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 2, 1]
Statistics: Num rows: 1 Data size: 100 Basic stats:
COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1 Data size: 100 Basic
stats: COMPLETE Column stats: NONE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -864,6 +1500,20 @@ STAGE PLANS:
serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: c:int, v2:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: String
Stage: Stage-0
Fetch Operator