[18/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

mmccline Sun, 16 Sep 2018 07:05:45 -0700

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out 
b/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out
index 12db036..b8d76ed 100644
--- a/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out
@@ -47,15 +47,167 @@ POSTHOOK: Input: default@myinput1_n4
 #### A masked pattern was here ####
 13630578
 Warning: Map Join MAPJOIN[14][bigTable=?] in task 'Map 2' is a cross product
-PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 
a RIGHT OUTER JOIN myinput1_n4 b
+PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER 
JOIN myinput1_n4 b
 PREHOOK: type: QUERY
-PREHOOK: Input: default@myinput1_n4
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM 
myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b
+POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER 
JOIN myinput1_n4 b
 POSTHOOK: type: QUERY
-POSTHOOK: Input: default@myinput1_n4
-#### A masked pattern was here ####
-13630578
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 2 <- Map 1 (BROADCAST_EDGE)
+        Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                  Select Operator
+                    expressions: key (type: int), value (type: int)
+                    outputColumnNames: _col0, _col1
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                    Statistics: Num rows: 3 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkEmptyKeyOperator
+                          native: true
+                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                      Statistics: Num rows: 3 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: int), _col1 (type: int)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                  Select Operator
+                    expressions: key (type: int), value (type: int)
+                    outputColumnNames: _col0, _col1
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                    Statistics: Num rows: 3 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Right Outer Join 0 to 1
+                      filter predicates:
+                        0 
+                        1 {true}
+                      keys:
+                        0 
+                        1 
+                      Map Join Vectorization:
+                          className: VectorMapJoinOuterFilteredOperator
+                          native: false
+                          nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS 
true
+                          nativeConditionsNotMet: Outer Join has keys IS false
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      input vertices:
+                        0 Map 1
+                      Statistics: Num rows: 9 Data size: 153 Basic stats: 
COMPLETE Column stats: NONE
+                      Select Operator
+                        expressions: hash(_col0,_col1,_col2,_col3) (type: int)
+                        outputColumnNames: _col0
+                        Select Vectorization:
+                            className: VectorSelectOperator
+                            native: true
+                            selectExpressions: 
VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 4:int
+                        Statistics: Num rows: 9 Data size: 153 Basic stats: 
COMPLETE Column stats: NONE
+                        Group By Operator
+                          aggregations: sum(_col0)
+                          Group By Vectorization:
+                              className: VectorGroupByOperator
+                              groupByMode: HASH
+                              native: false
+                              vectorProcessingMode: HASH
+                          mode: hash
+                          outputColumnNames: _col0
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+                          Reduce Output Operator
+                            sort order: 
+                            Reduce Sink Vectorization:
+                                className: VectorReduceSinkEmptyKeyOperator
+                                native: true
+                                nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+                            value expressions: _col0 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: true
+                vectorized: true
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                Group By Vectorization:
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    native: false
+                    vectorProcessingMode: GLOBAL
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 
a JOIN myinput1_n4 b ON a.key = b.value
 PREHOOK: type: QUERY
 PREHOOK: Input: default@myinput1_n4
@@ -128,42 +280,643 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@myinput1_n4
 #### A masked pattern was here ####
 4542003
-PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 
a RIGHT OUTER JOIN myinput1_n4 b ON a.key = b.value
+PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER 
JOIN myinput1_n4 b ON a.key = b.value
 PREHOOK: type: QUERY
-PREHOOK: Input: default@myinput1_n4
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM 
myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.key = b.value
+POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER 
JOIN myinput1_n4 b ON a.key = b.value
 POSTHOOK: type: QUERY
-POSTHOOK: Input: default@myinput1_n4
-#### A masked pattern was here ####
-3079923
-PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 
a RIGHT OUTER JOIN myinput1_n4 b ON a.key = b.key
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 2 <- Map 1 (BROADCAST_EDGE)
+        Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                  Select Operator
+                    expressions: key (type: int), value (type: int)
+                    outputColumnNames: _col0, _col1
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                    Statistics: Num rows: 3 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkLongOperator
+                          native: true
+                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                      Statistics: Num rows: 3 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col1 (type: int)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                  Select Operator
+                    expressions: key (type: int), value (type: int)
+                    outputColumnNames: _col0, _col1
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                    Statistics: Num rows: 3 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Right Outer Join 0 to 1
+                      keys:
+                        0 _col0 (type: int)
+                        1 _col1 (type: int)
+                      Map Join Vectorization:
+                          className: VectorMapJoinOuterLongOperator
+                          native: true
+                          nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized 
Table and Supports Key Types IS true
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      input vertices:
+                        0 Map 1
+                      Statistics: Num rows: 3 Data size: 26 Basic stats: 
COMPLETE Column stats: NONE
+                      Select Operator
+                        expressions: hash(_col0,_col1,_col2,_col3) (type: int)
+                        outputColumnNames: _col0
+                        Select Vectorization:
+                            className: VectorSelectOperator
+                            native: true
+                            selectExpressions: 
VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int
+                        Statistics: Num rows: 3 Data size: 26 Basic stats: 
COMPLETE Column stats: NONE
+                        Group By Operator
+                          aggregations: sum(_col0)
+                          Group By Vectorization:
+                              className: VectorGroupByOperator
+                              groupByMode: HASH
+                              native: false
+                              vectorProcessingMode: HASH
+                          mode: hash
+                          outputColumnNames: _col0
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+                          Reduce Output Operator
+                            sort order: 
+                            Reduce Sink Vectorization:
+                                className: VectorReduceSinkEmptyKeyOperator
+                                native: true
+                                nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+                            value expressions: _col0 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: true
+                vectorized: true
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                Group By Vectorization:
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    native: false
+                    vectorProcessingMode: GLOBAL
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER 
JOIN myinput1_n4 b ON a.key = b.key
 PREHOOK: type: QUERY
-PREHOOK: Input: default@myinput1_n4
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM 
myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.key = b.key
+POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER 
JOIN myinput1_n4 b ON a.key = b.key
 POSTHOOK: type: QUERY
-POSTHOOK: Input: default@myinput1_n4
-#### A masked pattern was here ####
-4509891
-PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 
a RIGHT OUTER JOIN myinput1_n4 b ON a.value = b.value
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 2 <- Map 1 (BROADCAST_EDGE)
+        Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                  Select Operator
+                    expressions: key (type: int), value (type: int)
+                    outputColumnNames: _col0, _col1
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                    Statistics: Num rows: 3 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkLongOperator
+                          native: true
+                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                      Statistics: Num rows: 3 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col1 (type: int)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                  Select Operator
+                    expressions: key (type: int), value (type: int)
+                    outputColumnNames: _col0, _col1
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                    Statistics: Num rows: 3 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Right Outer Join 0 to 1
+                      keys:
+                        0 _col0 (type: int)
+                        1 _col0 (type: int)
+                      Map Join Vectorization:
+                          className: VectorMapJoinOuterLongOperator
+                          native: true
+                          nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized 
Table and Supports Key Types IS true
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      input vertices:
+                        0 Map 1
+                      Statistics: Num rows: 3 Data size: 26 Basic stats: 
COMPLETE Column stats: NONE
+                      Select Operator
+                        expressions: hash(_col0,_col1,_col2,_col3) (type: int)
+                        outputColumnNames: _col0
+                        Select Vectorization:
+                            className: VectorSelectOperator
+                            native: true
+                            selectExpressions: 
VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int
+                        Statistics: Num rows: 3 Data size: 26 Basic stats: 
COMPLETE Column stats: NONE
+                        Group By Operator
+                          aggregations: sum(_col0)
+                          Group By Vectorization:
+                              className: VectorGroupByOperator
+                              groupByMode: HASH
+                              native: false
+                              vectorProcessingMode: HASH
+                          mode: hash
+                          outputColumnNames: _col0
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+                          Reduce Output Operator
+                            sort order: 
+                            Reduce Sink Vectorization:
+                                className: VectorReduceSinkEmptyKeyOperator
+                                native: true
+                                nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+                            value expressions: _col0 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: true
+                vectorized: true
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                Group By Vectorization:
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    native: false
+                    vectorProcessingMode: GLOBAL
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER 
JOIN myinput1_n4 b ON a.value = b.value
 PREHOOK: type: QUERY
-PREHOOK: Input: default@myinput1_n4
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM 
myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.value = b.value
+POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER 
JOIN myinput1_n4 b ON a.value = b.value
 POSTHOOK: type: QUERY
-POSTHOOK: Input: default@myinput1_n4
-#### A masked pattern was here ####
-3113558
-PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 
a RIGHT OUTER JOIN myinput1_n4 b ON a.key=b.key and a.value = b.value
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 2 <- Map 1 (BROADCAST_EDGE)
+        Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                  Select Operator
+                    expressions: key (type: int), value (type: int)
+                    outputColumnNames: _col0, _col1
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                    Statistics: Num rows: 3 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col1 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col1 (type: int)
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkLongOperator
+                          native: true
+                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                      Statistics: Num rows: 3 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: int)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                  Select Operator
+                    expressions: key (type: int), value (type: int)
+                    outputColumnNames: _col0, _col1
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                    Statistics: Num rows: 3 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Right Outer Join 0 to 1
+                      keys:
+                        0 _col1 (type: int)
+                        1 _col1 (type: int)
+                      Map Join Vectorization:
+                          className: VectorMapJoinOuterLongOperator
+                          native: true
+                          nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized 
Table and Supports Key Types IS true
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      input vertices:
+                        0 Map 1
+                      Statistics: Num rows: 3 Data size: 26 Basic stats: 
COMPLETE Column stats: NONE
+                      Select Operator
+                        expressions: hash(_col0,_col1,_col2,_col3) (type: int)
+                        outputColumnNames: _col0
+                        Select Vectorization:
+                            className: VectorSelectOperator
+                            native: true
+                            selectExpressions: 
VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int
+                        Statistics: Num rows: 3 Data size: 26 Basic stats: 
COMPLETE Column stats: NONE
+                        Group By Operator
+                          aggregations: sum(_col0)
+                          Group By Vectorization:
+                              className: VectorGroupByOperator
+                              groupByMode: HASH
+                              native: false
+                              vectorProcessingMode: HASH
+                          mode: hash
+                          outputColumnNames: _col0
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+                          Reduce Output Operator
+                            sort order: 
+                            Reduce Sink Vectorization:
+                                className: VectorReduceSinkEmptyKeyOperator
+                                native: true
+                                nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+                            value expressions: _col0 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: true
+                vectorized: true
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                Group By Vectorization:
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    native: false
+                    vectorProcessingMode: GLOBAL
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER 
JOIN myinput1_n4 b ON a.key=b.key and a.value = b.value
 PREHOOK: type: QUERY
-PREHOOK: Input: default@myinput1_n4
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM 
myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b ON a.key=b.key and a.value = 
b.value
+POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER 
JOIN myinput1_n4 b ON a.key=b.key and a.value = b.value
 POSTHOOK: type: QUERY
-POSTHOOK: Input: default@myinput1_n4
-#### A masked pattern was here ####
-3079923
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 2 <- Map 1 (BROADCAST_EDGE)
+        Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                  Select Operator
+                    expressions: key (type: int), value (type: int)
+                    outputColumnNames: _col0, _col1
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                    Statistics: Num rows: 3 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int), _col1 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: int), _col1 
(type: int)
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkMultiKeyOperator
+                          native: true
+                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                      Statistics: Num rows: 3 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                  Select Operator
+                    expressions: key (type: int), value (type: int)
+                    outputColumnNames: _col0, _col1
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                    Statistics: Num rows: 3 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Right Outer Join 0 to 1
+                      keys:
+                        0 _col0 (type: int), _col1 (type: int)
+                        1 _col0 (type: int), _col1 (type: int)
+                      Map Join Vectorization:
+                          className: VectorMapJoinOuterMultiKeyOperator
+                          native: true
+                          nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized 
Table and Supports Key Types IS true
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      input vertices:
+                        0 Map 1
+                      Statistics: Num rows: 3 Data size: 26 Basic stats: 
COMPLETE Column stats: NONE
+                      Select Operator
+                        expressions: hash(_col0,_col1,_col2,_col3) (type: int)
+                        outputColumnNames: _col0
+                        Select Vectorization:
+                            className: VectorSelectOperator
+                            native: true
+                            selectExpressions: 
VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int
+                        Statistics: Num rows: 3 Data size: 26 Basic stats: 
COMPLETE Column stats: NONE
+                        Group By Operator
+                          aggregations: sum(_col0)
+                          Group By Vectorization:
+                              className: VectorGroupByOperator
+                              groupByMode: HASH
+                              native: false
+                              vectorProcessingMode: HASH
+                          mode: hash
+                          outputColumnNames: _col0
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+                          Reduce Output Operator
+                            sort order: 
+                            Reduce Sink Vectorization:
+                                className: VectorReduceSinkEmptyKeyOperator
+                                native: true
+                                nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+                            value expressions: _col0 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: true
+                vectorized: true
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                Group By Vectorization:
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    native: false
+                    vectorProcessingMode: GLOBAL
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n4 
a LEFT OUTER JOIN myinput1_n4 b ON (a.value=b.value) RIGHT OUTER JOIN 
myinput1_n4 c ON (b.value=c.value)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@myinput1_n4


http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out 
b/ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out
index 55be910..2438603 100644
--- a/ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out
@@ -65,6 +65,7 @@ POSTHOOK: Output: default@tjoin1
 POSTHOOK: Lineage: tjoin1.c1 SIMPLE 
[(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ]
 POSTHOOK: Lineage: tjoin1.c2 EXPRESSION 
[(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ]
 POSTHOOK: Lineage: tjoin1.rnum SIMPLE 
[(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ]
+_col0  _col1   _col2
 PREHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE
 PREHOOK: type: QUERY
 PREHOOK: Input: default@tjoin2stage
@@ -76,12 +77,14 @@ POSTHOOK: Output: default@tjoin2
 POSTHOOK: Lineage: tjoin2.c1 SIMPLE 
[(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ]
 POSTHOOK: Lineage: tjoin2.c2 SIMPLE 
[(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ]
 POSTHOOK: Lineage: tjoin2.rnum SIMPLE 
[(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ]
-PREHOOK: query: explain vectorization expression
+tjoin2stage.rnum       tjoin2stage.c1  tjoin2stage.c2
+PREHOOK: query: explain vectorization detail
 select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left 
outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain vectorization detail
 select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left 
outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
 POSTHOOK: type: QUERY
+Explain
 PLAN VECTORIZATION:
   enabled: false
   enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
@@ -167,15 +170,21 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tjoin1
 POSTHOOK: Input: default@tjoin2
 #### A masked pattern was here ####
+tjoin1.rnum    tjoin1.c1       tjoin1.c2       c2j2
 0      10      15      NULL
 1      20      25      NULL
 2      NULL    50      NULL
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
 select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left 
outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
 select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left 
outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
 POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+  enabled: false
+  enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -257,15 +266,17 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tjoin1
 POSTHOOK: Input: default@tjoin2
 #### A masked pattern was here ####
+tjoin1.rnum    tjoin1.c1       tjoin1.c2       c2j2
 0      10      15      NULL
 1      20      25      NULL
 2      NULL    50      NULL
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain vectorization detail
 select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left 
outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain vectorization detail
 select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left 
outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
 POSTHOOK: type: QUERY
+Explain
 PLAN VECTORIZATION:
   enabled: true
   enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
@@ -289,6 +300,7 @@ STAGE PLANS:
                   Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE 
Column stats: COMPLETE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 
2:c2:int, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: rnum (type: int), c1 (type: int), c2 (type: 
int)
                     outputColumnNames: _col0, _col1, _col2
@@ -346,6 +358,12 @@ STAGE PLANS:
                 allNative: false
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 3
+                    includeColumns: [0, 1, 2]
+                    dataColumns: rnum:int, c1:int, c2:int
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: [string]
         Map 2 
             Map Operator Tree:
                 TableScan
@@ -353,6 +371,7 @@ STAGE PLANS:
                   Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE 
Column stats: COMPLETE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 
2:c2:char(2), 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: c1 (type: int), c2 (type: char(2))
                     outputColumnNames: _col0, _col1
@@ -367,8 +386,10 @@ STAGE PLANS:
                       Map-reduce partition columns: _col0 (type: int)
                       Reduce Sink Vectorization:
                           className: VectorReduceSinkLongOperator
+                          keyColumns: 1:int
                           native: true
                           nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          valueColumns: 2:char(2)
                       Statistics: Num rows: 4 Data size: 360 Basic stats: 
COMPLETE Column stats: COMPLETE
                       value expressions: _col1 (type: char(2))
             Execution mode: vectorized, llap
@@ -382,6 +403,12 @@ STAGE PLANS:
                 allNative: true
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 3
+                    includeColumns: [1, 2]
+                    dataColumns: rnum:int, c1:int, c2:char(2)
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
 
   Stage: Stage-0
     Fetch Operator
@@ -399,15 +426,17 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tjoin1
 POSTHOOK: Input: default@tjoin2
 #### A masked pattern was here ####
+tjoin1.rnum    tjoin1.c1       tjoin1.c2       c2j2
 0      10      15      NULL
 1      20      25      NULL
 2      NULL    50      NULL
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain vectorization detail
 select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left 
outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain vectorization detail
 select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left 
outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
 POSTHOOK: type: QUERY
+Explain
 PLAN VECTORIZATION:
   enabled: true
   enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
@@ -431,6 +460,7 @@ STAGE PLANS:
                   Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE 
Column stats: COMPLETE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 
2:c2:int, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: rnum (type: int), c1 (type: int), c2 (type: 
int)
                     outputColumnNames: _col0, _col1, _col2
@@ -488,6 +518,12 @@ STAGE PLANS:
                 allNative: false
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 3
+                    includeColumns: [0, 1, 2]
+                    dataColumns: rnum:int, c1:int, c2:int
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: [string]
         Map 2 
             Map Operator Tree:
                 TableScan
@@ -495,6 +531,7 @@ STAGE PLANS:
                   Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE 
Column stats: COMPLETE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 
2:c2:char(2), 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: c1 (type: int), c2 (type: char(2))
                     outputColumnNames: _col0, _col1
@@ -509,8 +546,10 @@ STAGE PLANS:
                       Map-reduce partition columns: _col0 (type: int)
                       Reduce Sink Vectorization:
                           className: VectorReduceSinkLongOperator
+                          keyColumns: 1:int
                           native: true
                           nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          valueColumns: 2:char(2)
                       Statistics: Num rows: 4 Data size: 360 Basic stats: 
COMPLETE Column stats: COMPLETE
                       value expressions: _col1 (type: char(2))
             Execution mode: vectorized, llap
@@ -524,6 +563,12 @@ STAGE PLANS:
                 allNative: true
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 3
+                    includeColumns: [1, 2]
+                    dataColumns: rnum:int, c1:int, c2:char(2)
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
 
   Stage: Stage-0
     Fetch Operator
@@ -541,15 +586,17 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tjoin1
 POSTHOOK: Input: default@tjoin2
 #### A masked pattern was here ####
+tjoin1.rnum    tjoin1.c1       tjoin1.c2       c2j2
 0      10      15      NULL
 1      20      25      NULL
 2      NULL    50      NULL
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain vectorization detail
 select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left 
outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain vectorization detail
 select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left 
outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
 POSTHOOK: type: QUERY
+Explain
 PLAN VECTORIZATION:
   enabled: true
   enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
@@ -573,6 +620,7 @@ STAGE PLANS:
                   Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE 
Column stats: COMPLETE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 
2:c2:int, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: rnum (type: int), c1 (type: int), c2 (type: 
int)
                     outputColumnNames: _col0, _col1, _col2
@@ -591,9 +639,16 @@ STAGE PLANS:
                         0 _col1 (type: int)
                         1 _col0 (type: int)
                       Map Join Vectorization:
+                          bigTableFilterExpressions: 
FilterLongColGreaterLongScalar(col 2:int, val 15)
+                          bigTableKeyColumns: 1:int
+                          bigTableRetainColumnNums: [0, 1, 2]
+                          bigTableValueColumns: 0:int, 1:int, 2:int
                           className: VectorMapJoinOuterLongOperator
                           native: true
                           nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized 
Table and Supports Key Types IS true
+                          projectedOutput: 0:int, 1:int, 2:int, 4:char(2)
+                          smallTableValueMapping: 4:char(2)
+                          hashTableImplementationType: OPTIMIZED
                       outputColumnNames: _col0, _col1, _col2, _col4
                       input vertices:
                         1 Map 2
@@ -627,6 +682,12 @@ STAGE PLANS:
                 allNative: false
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 3
+                    includeColumns: [0, 1, 2]
+                    dataColumns: rnum:int, c1:int, c2:int
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: [string]
         Map 2 
             Map Operator Tree:
                 TableScan
@@ -634,6 +695,7 @@ STAGE PLANS:
                   Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE 
Column stats: COMPLETE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 
2:c2:char(2), 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: c1 (type: int), c2 (type: char(2))
                     outputColumnNames: _col0, _col1
@@ -648,8 +710,10 @@ STAGE PLANS:
                       Map-reduce partition columns: _col0 (type: int)
                       Reduce Sink Vectorization:
                           className: VectorReduceSinkLongOperator
+                          keyColumns: 1:int
                           native: true
                           nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          valueColumns: 2:char(2)
                       Statistics: Num rows: 4 Data size: 360 Basic stats: 
COMPLETE Column stats: COMPLETE
                       value expressions: _col1 (type: char(2))
             Execution mode: vectorized, llap
@@ -663,6 +727,12 @@ STAGE PLANS:
                 allNative: true
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 3
+                    includeColumns: [1, 2]
+                    dataColumns: rnum:int, c1:int, c2:char(2)
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
 
   Stage: Stage-0
     Fetch Operator
@@ -680,15 +750,17 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tjoin1
 POSTHOOK: Input: default@tjoin2
 #### A masked pattern was here ####
+tjoin1.rnum    tjoin1.c1       tjoin1.c2       c2j2
 0      10      15      NULL
 1      20      25      NULL
 2      NULL    50      NULL
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain vectorization detail
 select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left 
outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain vectorization detail
 select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left 
outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 )
 POSTHOOK: type: QUERY
+Explain
 PLAN VECTORIZATION:
   enabled: true
   enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
@@ -712,6 +784,7 @@ STAGE PLANS:
                   Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE 
Column stats: COMPLETE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 
2:c2:int, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: rnum (type: int), c1 (type: int), c2 (type: 
int)
                     outputColumnNames: _col0, _col1, _col2
@@ -730,9 +803,16 @@ STAGE PLANS:
                         0 _col1 (type: int)
                         1 _col0 (type: int)
                       Map Join Vectorization:
+                          bigTableFilterExpressions: 
FilterLongColGreaterLongScalar(col 2:int, val 15)
+                          bigTableKeyColumns: 1:int
+                          bigTableRetainColumnNums: [0, 1, 2]
+                          bigTableValueColumns: 0:int, 1:int, 2:int
                           className: VectorMapJoinOuterLongOperator
                           native: true
                           nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized 
Table and Supports Key Types IS true
+                          projectedOutput: 0:int, 1:int, 2:int, 4:char(2)
+                          smallTableValueMapping: 4:char(2)
+                          hashTableImplementationType: OPTIMIZED
                       outputColumnNames: _col0, _col1, _col2, _col4
                       input vertices:
                         1 Map 2
@@ -766,6 +846,12 @@ STAGE PLANS:
                 allNative: false
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 3
+                    includeColumns: [0, 1, 2]
+                    dataColumns: rnum:int, c1:int, c2:int
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: [string]
         Map 2 
             Map Operator Tree:
                 TableScan
@@ -773,6 +859,7 @@ STAGE PLANS:
                   Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE 
Column stats: COMPLETE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 
2:c2:char(2), 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: c1 (type: int), c2 (type: char(2))
                     outputColumnNames: _col0, _col1
@@ -787,8 +874,10 @@ STAGE PLANS:
                       Map-reduce partition columns: _col0 (type: int)
                       Reduce Sink Vectorization:
                           className: VectorReduceSinkLongOperator
+                          keyColumns: 1:int
                           native: true
                           nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          valueColumns: 2:char(2)
                       Statistics: Num rows: 4 Data size: 360 Basic stats: 
COMPLETE Column stats: COMPLETE
                       value expressions: _col1 (type: char(2))
             Execution mode: vectorized, llap
@@ -802,6 +891,12 @@ STAGE PLANS:
                 allNative: true
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 3
+                    includeColumns: [1, 2]
+                    dataColumns: rnum:int, c1:int, c2:char(2)
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
 
   Stage: Stage-0
     Fetch Operator
@@ -819,6 +914,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tjoin1
 POSTHOOK: Input: default@tjoin2
 #### A masked pattern was here ####
+tjoin1.rnum    tjoin1.c1       tjoin1.c2       c2j2
 0      10      15      NULL
 1      20      25      NULL
 2      NULL    50      NULL

[18/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

Reply via email to