http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out
b/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out
index 7c1780b..a49e8e2 100644
--- a/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out
@@ -47,15 +47,174 @@ POSTHOOK: Input: default@myinput1_n1
#### A masked pattern was here ####
4937935
Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Map 2' is a cross product
-PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM
myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND
a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER
JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key
> 40 AND b.value > 50 AND b.key = b.value
PREHOOK: type: QUERY
-PREHOOK: Input: default@myinput1_n1
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM
myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND
a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER
JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key
> 40 AND b.value > 50 AND b.key = b.value
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@myinput1_n1
-#### A masked pattern was here ####
-3080335
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 2 <- Map 1 (BROADCAST_EDGE)
+ Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ filterExpr: ((key > 40) and (value > 50) and (key = value))
(type: boolean)
+ Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicate: ((key = value) and (key > 40) and (value > 50))
(type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: int)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkEmptyKeyOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ Select Operator
+ expressions: key (type: int), value (type: int)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ Statistics: Num rows: 4 Data size: 32 Basic stats:
COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Right Outer Join 0 to 1
+ filter predicates:
+ 0
+ 1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)}
+ keys:
+ 0
+ 1
+ Map Join Vectorization:
+ className: VectorMapJoinOuterFilteredOperator
+ native: false
+ nativeConditionsMet:
hive.mapjoin.optimized.hashtable IS true,
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS
true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS
true
+ nativeConditionsNotMet: Outer Join has keys IS false
+ outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 0 Map 1
+ Statistics: Num rows: 4 Data size: 68 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hash(_col0,_col1,_col2,_col3) (type: int)
+ outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ selectExpressions:
VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 4:int
+ Statistics: Num rows: 4 Data size: 68 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col0)
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ native: false
+ vectorProcessingMode: HASH
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkEmptyKeyOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine tez IN [tez, spark] IS true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ native: false
+ vectorProcessingMode: GLOBAL
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1
a JOIN myinput1_n1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND
a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
PREHOOK: type: QUERY
PREHOOK: Input: default@myinput1_n1
@@ -128,42 +287,681 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@myinput1_n1
#### A masked pattern was here ####
4937935
-PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1
a RIGHT OUTER JOIN myinput1_n1 b ON a.key = b.value AND a.key > 40 AND a.value
> 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER
JOIN myinput1_n1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key
= a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
PREHOOK: type: QUERY
-PREHOOK: Input: default@myinput1_n1
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM
myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.key = b.value AND a.key > 40
AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key
= b.value
+POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER
JOIN myinput1_n1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key
= a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@myinput1_n1
-#### A masked pattern was here ####
-3080335
-PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1
a RIGHT OUTER JOIN myinput1_n1 b ON a.key = b.key AND a.key > 40 AND a.value >
50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 2 <- Map 1 (BROADCAST_EDGE)
+ Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ filterExpr: ((key > 40) and (value > 50) and (key = value))
(type: boolean)
+ Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicate: ((key = value) and (key > 40) and (value > 50))
(type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: int)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ Select Operator
+ expressions: key (type: int), value (type: int)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ Statistics: Num rows: 4 Data size: 32 Basic stats:
COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Right Outer Join 0 to 1
+ filter predicates:
+ 0
+ 1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)}
+ keys:
+ 0 _col0 (type: int)
+ 1 _col1 (type: int)
+ Map Join Vectorization:
+ className: VectorMapJoinOuterLongOperator
+ native: true
+ nativeConditionsMet:
hive.mapjoin.optimized.hashtable IS true,
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS
true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized
Table and Supports Key Types IS true
+ outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 0 Map 1
+ Statistics: Num rows: 4 Data size: 35 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hash(_col0,_col1,_col2,_col3) (type: int)
+ outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ selectExpressions:
VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int
+ Statistics: Num rows: 4 Data size: 35 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col0)
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ native: false
+ vectorProcessingMode: HASH
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkEmptyKeyOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine tez IN [tez, spark] IS true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ native: false
+ vectorProcessingMode: GLOBAL
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER
JOIN myinput1_n1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key =
a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
PREHOOK: type: QUERY
-PREHOOK: Input: default@myinput1_n1
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM
myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.key = b.key AND a.key > 40
AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key
= b.value
+POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER
JOIN myinput1_n1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key =
a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@myinput1_n1
-#### A masked pattern was here ####
-3080335
-PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1
a RIGHT OUTER JOIN myinput1_n1 b ON a.value = b.value AND a.key > 40 AND
a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key =
b.value
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 2 <- Map 1 (BROADCAST_EDGE)
+ Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ filterExpr: ((key > 40) and (value > 50) and (key = value))
(type: boolean)
+ Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicate: ((key = value) and (key > 40) and (value > 50))
(type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: int)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ Select Operator
+ expressions: key (type: int), value (type: int)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ Statistics: Num rows: 4 Data size: 32 Basic stats:
COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Right Outer Join 0 to 1
+ filter predicates:
+ 0
+ 1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)}
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ Map Join Vectorization:
+ className: VectorMapJoinOuterLongOperator
+ native: true
+ nativeConditionsMet:
hive.mapjoin.optimized.hashtable IS true,
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS
true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized
Table and Supports Key Types IS true
+ outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 0 Map 1
+ Statistics: Num rows: 4 Data size: 35 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hash(_col0,_col1,_col2,_col3) (type: int)
+ outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ selectExpressions:
VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int
+ Statistics: Num rows: 4 Data size: 35 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col0)
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ native: false
+ vectorProcessingMode: HASH
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkEmptyKeyOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine tez IN [tez, spark] IS true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ native: false
+ vectorProcessingMode: GLOBAL
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER
JOIN myinput1_n1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND
a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
PREHOOK: type: QUERY
-PREHOOK: Input: default@myinput1_n1
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM
myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.value = b.value AND a.key >
40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND
b.key = b.value
+POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER
JOIN myinput1_n1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND
a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@myinput1_n1
-#### A masked pattern was here ####
-3080335
-PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1
a RIGHT OUTER JOIN myinput1_n1 b ON a.key=b.key and a.value = b.value AND a.key
> 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND
b.key = b.value
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 2 <- Map 1 (BROADCAST_EDGE)
+ Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ filterExpr: ((key > 40) and (value > 50) and (key = value))
(type: boolean)
+ Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicate: ((key = value) and (key > 40) and (value > 50))
(type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: int)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ Select Operator
+ expressions: key (type: int), value (type: int)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ Statistics: Num rows: 4 Data size: 32 Basic stats:
COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Right Outer Join 0 to 1
+ filter predicates:
+ 0
+ 1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)}
+ keys:
+ 0 _col1 (type: int)
+ 1 _col1 (type: int)
+ Map Join Vectorization:
+ className: VectorMapJoinOuterLongOperator
+ native: true
+ nativeConditionsMet:
hive.mapjoin.optimized.hashtable IS true,
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS
true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized
Table and Supports Key Types IS true
+ outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 0 Map 1
+ Statistics: Num rows: 4 Data size: 35 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hash(_col0,_col1,_col2,_col3) (type: int)
+ outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ selectExpressions:
VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int
+ Statistics: Num rows: 4 Data size: 35 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col0)
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ native: false
+ vectorProcessingMode: HASH
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkEmptyKeyOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine tez IN [tez, spark] IS true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ native: false
+ vectorProcessingMode: GLOBAL
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER
JOIN myinput1_n1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND
a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key =
b.value
PREHOOK: type: QUERY
-PREHOOK: Input: default@myinput1_n1
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM
myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b ON a.key=b.key and a.value =
b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND
b.value > 50 AND b.key = b.value
+POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER
JOIN myinput1_n1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND
a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key =
b.value
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@myinput1_n1
-#### A masked pattern was here ####
-3080335
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 2 <- Map 1 (BROADCAST_EDGE)
+ Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ filterExpr: ((key > 40) and (value > 50) and (key = value))
(type: boolean)
+ Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicate: ((key = value) and (key > 40) and (value > 50))
(type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: int)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1
(type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkMultiKeyOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ Select Operator
+ expressions: key (type: int), value (type: int)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ Statistics: Num rows: 4 Data size: 32 Basic stats:
COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Right Outer Join 0 to 1
+ filter predicates:
+ 0
+ 1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)}
+ keys:
+ 0 _col0 (type: int), _col1 (type: int)
+ 1 _col0 (type: int), _col1 (type: int)
+ Map Join Vectorization:
+ className: VectorMapJoinOuterMultiKeyOperator
+ native: true
+ nativeConditionsMet:
hive.mapjoin.optimized.hashtable IS true,
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS
true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized
Table and Supports Key Types IS true
+ outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 0 Map 1
+ Statistics: Num rows: 4 Data size: 35 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: hash(_col0,_col1,_col2,_col3) (type: int)
+ outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ selectExpressions:
VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int
+ Statistics: Num rows: 4 Data size: 35 Basic stats:
COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col0)
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ native: false
+ vectorProcessingMode: HASH
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkEmptyKeyOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine tez IN [tez, spark] IS true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ native: false
+ vectorProcessingMode: GLOBAL
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: NONE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n1
a LEFT OUTER JOIN myinput1_n1 b ON (a.value=b.value AND a.key > 40 AND a.value
> 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
RIGHT OUTER JOIN myinput1_n1 c ON (b.value=c.value AND c.key > 40 AND c.value >
50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value)
PREHOOK: type: QUERY
PREHOOK: Input: default@myinput1_n1