[20/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

mmccline Sun, 16 Sep 2018 07:05:45 -0700

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_join30.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_join30.q.out 
b/ql/src/test/results/clientpositive/llap/vector_join30.q.out
index 4b2f06f..9238bc7 100644
--- a/ql/src/test/results/clientpositive/llap/vector_join30.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_join30.q.out
@@ -10,7 +10,7 @@ POSTHOOK: Output: database:default
 POSTHOOK: Output: default@orcsrc_n0
 POSTHOOK: Lineage: orcsrc_n0.key SIMPLE [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
 POSTHOOK: Lineage: orcsrc_n0.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
 JOIN
@@ -18,7 +18,7 @@ JOIN
 ON (x.key = Y.key)
 select sum(hash(Y.key,Y.value))
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
 JOIN
@@ -51,6 +51,7 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Filter Operator
                     Filter Vectorization:
                         className: VectorFilterOperator
@@ -72,6 +73,7 @@ STAGE PLANS:
                         Map-reduce partition columns: _col0 (type: string)
                         Reduce Sink Vectorization:
                             className: VectorReduceSinkStringOperator
+                            keyColumns: 0:string
                             native: true
                             nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 475 Data size: 83204 Basic 
stats: COMPLETE Column stats: NONE
@@ -86,6 +88,12 @@ STAGE PLANS:
                 allNative: true
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
         Map 2 
             Map Operator Tree:
                 TableScan
@@ -94,6 +102,7 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 175168 Basic stats: 
COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Filter Operator
                     Filter Vectorization:
                         className: VectorFilterOperator
@@ -116,9 +125,15 @@ STAGE PLANS:
                           0 _col0 (type: string)
                           1 _col0 (type: string)
                         Map Join Vectorization:
+                            bigTableKeyColumns: 0:string
+                            bigTableRetainColumnNums: [0, 1]
+                            bigTableValueColumns: 0:string, 1:string
                             className: VectorMapJoinInnerBigOnlyStringOperator
                             native: true
                             nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS 
true
+                            nonOuterSmallTableKeyMapping: []
+                            projectedOutput: 0:string, 1:string
+                            hashTableImplementationType: OPTIMIZED
                         outputColumnNames: _col2, _col3
                         input vertices:
                           0 Map 1
@@ -141,6 +156,7 @@ STAGE PLANS:
                                 className: VectorReduceSinkEmptyKeyOperator
                                 native: true
                                 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                                valueColumns: 0:bigint
                             Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
                             value expressions: _col0 (type: bigint)
             Execution mode: vectorized, llap
@@ -154,14 +170,27 @@ STAGE PLANS:
                 allNative: false
                 usesVectorUDFAdaptor: true
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0, 1]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: [bigint]
         Reducer 3 
             Execution mode: vectorized, llap
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: 
+                reduceColumnSortOrder: 
                 allNative: false
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
@@ -192,26 +221,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: FROM 
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-JOIN 
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
-ON (x.key = Y.key)
-select sum(hash(Y.key,Y.value))
-PREHOOK: type: QUERY
-PREHOOK: Input: default@orcsrc_n0
-#### A masked pattern was here ####
-POSTHOOK: query: FROM 
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-JOIN 
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
-ON (x.key = Y.key)
-select sum(hash(Y.key,Y.value))
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@orcsrc_n0
-#### A masked pattern was here ####
-103231310608
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
 LEFT OUTER JOIN
@@ -219,7 +229,7 @@ LEFT OUTER JOIN
 ON (x.key = Y.key)
 select sum(hash(Y.key,Y.value))
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
 LEFT OUTER JOIN
@@ -251,6 +261,7 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: key (type: string)
                     outputColumnNames: _col0
@@ -266,9 +277,15 @@ STAGE PLANS:
                         0 _col0 (type: string)
                         1 _col0 (type: string)
                       Map Join Vectorization:
+                          bigTableKeyColumns: 0:string
+                          bigTableRetainColumnNums: []
                           className: VectorMapJoinOuterStringOperator
                           native: true
                           nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized 
Table and Supports Key Types IS true
+                          outerSmallTableKeyMapping: 0 -> 3
+                          projectedOutput: 3:string, 4:string
+                          smallTableValueMapping: 4:string
+                          hashTableImplementationType: OPTIMIZED
                       outputColumnNames: _col2, _col3
                       input vertices:
                         1 Map 3
@@ -291,6 +308,7 @@ STAGE PLANS:
                               className: VectorReduceSinkEmptyKeyOperator
                               native: true
                               nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                              valueColumns: 0:bigint
                           Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
                           value expressions: _col0 (type: bigint)
             Execution mode: vectorized, llap
@@ -304,6 +322,12 @@ STAGE PLANS:
                 allNative: false
                 usesVectorUDFAdaptor: true
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: [string, string, bigint]
         Map 3 
             Map Operator Tree:
                 TableScan
@@ -311,6 +335,7 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 175168 Basic stats: 
COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: key (type: string), value (type: string)
                     outputColumnNames: _col0, _col1
@@ -325,8 +350,10 @@ STAGE PLANS:
                       Map-reduce partition columns: _col0 (type: string)
                       Reduce Sink Vectorization:
                           className: VectorReduceSinkStringOperator
+                          keyColumns: 0:string
                           native: true
                           nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          valueColumns: 1:string
                       Statistics: Num rows: 500 Data size: 175168 Basic stats: 
COMPLETE Column stats: NONE
                       value expressions: _col1 (type: string)
             Execution mode: vectorized, llap
@@ -340,14 +367,27 @@ STAGE PLANS:
                 allNative: true
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0, 1]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
         Reducer 2 
             Execution mode: vectorized, llap
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: 
+                reduceColumnSortOrder: 
                 allNative: false
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
@@ -378,26 +418,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: FROM 
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-LEFT OUTER JOIN 
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
-ON (x.key = Y.key)
-select sum(hash(Y.key,Y.value))
-PREHOOK: type: QUERY
-PREHOOK: Input: default@orcsrc_n0
-#### A masked pattern was here ####
-POSTHOOK: query: FROM 
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-LEFT OUTER JOIN 
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
-ON (x.key = Y.key)
-select sum(hash(Y.key,Y.value))
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@orcsrc_n0
-#### A masked pattern was here ####
-103231310608
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
 RIGHT OUTER JOIN
@@ -405,7 +426,7 @@ RIGHT OUTER JOIN
 ON (x.key = Y.key)
 select sum(hash(Y.key,Y.value))
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
 RIGHT OUTER JOIN
@@ -437,6 +458,7 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: key (type: string)
                     outputColumnNames: _col0
@@ -451,6 +473,7 @@ STAGE PLANS:
                       Map-reduce partition columns: _col0 (type: string)
                       Reduce Sink Vectorization:
                           className: VectorReduceSinkStringOperator
+                          keyColumns: 0:string
                           native: true
                           nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                       Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
@@ -465,6 +488,12 @@ STAGE PLANS:
                 allNative: true
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
         Map 2 
             Map Operator Tree:
                 TableScan
@@ -472,6 +501,7 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 175168 Basic stats: 
COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: key (type: string), value (type: string)
                     outputColumnNames: _col0, _col1
@@ -487,9 +517,14 @@ STAGE PLANS:
                         0 _col0 (type: string)
                         1 _col0 (type: string)
                       Map Join Vectorization:
+                          bigTableKeyColumns: 0:string
+                          bigTableRetainColumnNums: [0, 1]
+                          bigTableValueColumns: 0:string, 1:string
                           className: VectorMapJoinOuterStringOperator
                           native: true
                           nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized 
Table and Supports Key Types IS true
+                          projectedOutput: 0:string, 1:string
+                          hashTableImplementationType: OPTIMIZED
                       outputColumnNames: _col2, _col3
                       input vertices:
                         0 Map 1
@@ -512,6 +547,7 @@ STAGE PLANS:
                               className: VectorReduceSinkEmptyKeyOperator
                               native: true
                               nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                              valueColumns: 0:bigint
                           Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
                           value expressions: _col0 (type: bigint)
             Execution mode: vectorized, llap
@@ -525,14 +561,27 @@ STAGE PLANS:
                 allNative: false
                 usesVectorUDFAdaptor: true
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0, 1]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: [bigint]
         Reducer 3 
             Execution mode: vectorized, llap
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: 
+                reduceColumnSortOrder: 
                 allNative: false
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
@@ -563,26 +612,196 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: FROM 
+PREHOOK: query: explain vectorization detail
+FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-RIGHT OUTER JOIN 
+FULL OUTER JOIN
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
 ON (x.key = Y.key)
 select sum(hash(Y.key,Y.value))
 PREHOOK: type: QUERY
-PREHOOK: Input: default@orcsrc_n0
-#### A masked pattern was here ####
-POSTHOOK: query: FROM 
+POSTHOOK: query: explain vectorization detail
+FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-RIGHT OUTER JOIN 
+FULL OUTER JOIN
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
 ON (x.key = Y.key)
 select sum(hash(Y.key,Y.value))
 POSTHOOK: type: QUERY
-POSTHOOK: Input: default@orcsrc_n0
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
-103231310608
-PREHOOK: query: explain vectorization expression
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc_n0
+                  Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: _col0
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0]
+                    Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkStringOperator
+                          keyColumns: 0:string
+                          native: true
+                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                      Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc_n0
+                  Statistics: Num rows: 500 Data size: 175168 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0, 1]
+                    Statistics: Num rows: 500 Data size: 175168 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkStringOperator
+                          keyColumns: 0:string
+                          native: true
+                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          valueColumns: 1:string
+                      Statistics: Num rows: 500 Data size: 175168 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col1 (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0, 1]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Full Outer Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col2, _col3
+                Statistics: Num rows: 550 Data size: 96342 Basic stats: 
COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: sum(hash(_col2,_col3))
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+            MergeJoin Vectorization:
+                enabled: false
+                enableConditionsNotMet: Vectorizing MergeJoin Supported IS 
false
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: 
+                reduceColumnSortOrder: 
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    native: false
+                    vectorProcessingMode: GLOBAL
+                    projectedOutputColumnNums: [0]
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
 JOIN
@@ -593,7 +812,7 @@ JOIN
 ON (x.key = Z.key)
 select sum(hash(Y.key,Y.value))
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
 JOIN
@@ -629,6 +848,7 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Filter Operator
                     Filter Vectorization:
                         className: VectorFilterOperator
@@ -650,6 +870,7 @@ STAGE PLANS:
                         Map-reduce partition columns: _col0 (type: string)
                         Reduce Sink Vectorization:
                             className: VectorReduceSinkStringOperator
+                            keyColumns: 0:string
                             native: true
                             nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 475 Data size: 83204 Basic 
stats: COMPLETE Column stats: NONE
@@ -664,6 +885,12 @@ STAGE PLANS:
                 allNative: true
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
         Map 2 
             Map Operator Tree:
                 TableScan
@@ -672,6 +899,7 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 175168 Basic stats: 
COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Filter Operator
                     Filter Vectorization:
                         className: VectorFilterOperator
@@ -725,6 +953,7 @@ STAGE PLANS:
                                 className: VectorReduceSinkEmptyKeyOperator
                                 native: true
                                 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                                valueColumns: 0:bigint
                             Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
                             value expressions: _col0 (type: bigint)
             Execution mode: vectorized, llap
@@ -738,6 +967,12 @@ STAGE PLANS:
                 allNative: false
                 usesVectorUDFAdaptor: true
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0, 1]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
         Map 4 
             Map Operator Tree:
                 TableScan
@@ -746,6 +981,7 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Filter Operator
                     Filter Vectorization:
                         className: VectorFilterOperator
@@ -767,6 +1003,7 @@ STAGE PLANS:
                         Map-reduce partition columns: _col0 (type: string)
                         Reduce Sink Vectorization:
                             className: VectorReduceSinkStringOperator
+                            keyColumns: 0:string
                             native: true
                             nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 475 Data size: 83204 Basic 
stats: COMPLETE Column stats: NONE
@@ -781,14 +1018,27 @@ STAGE PLANS:
                 allNative: true
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
         Reducer 3 
             Execution mode: vectorized, llap
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: 
+                reduceColumnSortOrder: 
                 allNative: false
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
@@ -819,32 +1069,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: FROM
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-JOIN
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
-ON (x.key = Y.key)
-JOIN
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
-ON (x.key = Z.key)
-select sum(hash(Y.key,Y.value))
-PREHOOK: type: QUERY
-PREHOOK: Input: default@orcsrc_n0
-#### A masked pattern was here ####
-POSTHOOK: query: FROM
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-JOIN
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
-ON (x.key = Y.key)
-JOIN
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
-ON (x.key = Z.key)
-select sum(hash(Y.key,Y.value))
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@orcsrc_n0
-#### A masked pattern was here ####
-348019368476
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
 JOIN
@@ -855,7 +1080,7 @@ LEFT OUTER JOIN
 ON (x.key = Z.key)
 select sum(hash(Y.key,Y.value))
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
 JOIN
@@ -890,6 +1115,7 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: key (type: string)
                     outputColumnNames: _col0
@@ -904,6 +1130,7 @@ STAGE PLANS:
                       Map-reduce partition columns: _col0 (type: string)
                       Reduce Sink Vectorization:
                           className: VectorReduceSinkStringOperator
+                          keyColumns: 0:string
                           native: true
                           nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                       Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
@@ -918,6 +1145,12 @@ STAGE PLANS:
                 allNative: true
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
         Map 4 
             Map Operator Tree:
                 TableScan
@@ -925,6 +1158,7 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 175168 Basic stats: 
COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: key (type: string), value (type: string)
                     outputColumnNames: _col0, _col1
@@ -939,8 +1173,10 @@ STAGE PLANS:
                       Map-reduce partition columns: _col0 (type: string)
                       Reduce Sink Vectorization:
                           className: VectorReduceSinkStringOperator
+                          keyColumns: 0:string
                           native: true
                           nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          valueColumns: 1:string
                       Statistics: Num rows: 500 Data size: 175168 Basic stats: 
COMPLETE Column stats: NONE
                       value expressions: _col1 (type: string)
             Execution mode: vectorized, llap
@@ -954,6 +1190,12 @@ STAGE PLANS:
                 allNative: true
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0, 1]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
         Map 5 
             Map Operator Tree:
                 TableScan
@@ -961,6 +1203,7 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: key (type: string)
                     outputColumnNames: _col0
@@ -975,6 +1218,7 @@ STAGE PLANS:
                       Map-reduce partition columns: _col0 (type: string)
                       Reduce Sink Vectorization:
                           className: VectorReduceSinkStringOperator
+                          keyColumns: 0:string
                           native: true
                           nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                       Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
@@ -989,6 +1233,12 @@ STAGE PLANS:
                 allNative: true
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -1011,14 +1261,24 @@ STAGE PLANS:
                     sort order: 
                     Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
                     value expressions: _col0 (type: bigint)
+            MergeJoin Vectorization:
+                enabled: false
+                enableConditionsNotMet: Vectorizing MergeJoin Supported IS 
false
         Reducer 3 
             Execution mode: vectorized, llap
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: 
+                reduceColumnSortOrder: 
                 allNative: false
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
@@ -1049,32 +1309,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: FROM
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-JOIN
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
-ON (x.key = Y.key)
-LEFT OUTER JOIN
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
-ON (x.key = Z.key)
-select sum(hash(Y.key,Y.value))
-PREHOOK: type: QUERY
-PREHOOK: Input: default@orcsrc_n0
-#### A masked pattern was here ####
-POSTHOOK: query: FROM
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-JOIN
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
-ON (x.key = Y.key)
-LEFT OUTER JOIN
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
-ON (x.key = Z.key)
-select sum(hash(Y.key,Y.value))
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@orcsrc_n0
-#### A masked pattern was here ####
-348019368476
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
 LEFT OUTER JOIN
@@ -1085,7 +1320,7 @@ LEFT OUTER JOIN
 ON (x.key = Z.key)
 select sum(hash(Y.key,Y.value))
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
 LEFT OUTER JOIN
@@ -1120,6 +1355,7 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: key (type: string)
                     outputColumnNames: _col0
@@ -1134,6 +1370,7 @@ STAGE PLANS:
                       Map-reduce partition columns: _col0 (type: string)
                       Reduce Sink Vectorization:
                           className: VectorReduceSinkStringOperator
+                          keyColumns: 0:string
                           native: true
                           nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                       Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
@@ -1148,6 +1385,12 @@ STAGE PLANS:
                 allNative: true
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
         Map 4 
             Map Operator Tree:
                 TableScan
@@ -1155,6 +1398,7 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 175168 Basic stats: 
COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: key (type: string), value (type: string)
                     outputColumnNames: _col0, _col1
@@ -1169,8 +1413,10 @@ STAGE PLANS:
                       Map-reduce partition columns: _col0 (type: string)
                       Reduce Sink Vectorization:
                           className: VectorReduceSinkStringOperator
+                          keyColumns: 0:string
                           native: true
                           nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          valueColumns: 1:string
                       Statistics: Num rows: 500 Data size: 175168 Basic stats: 
COMPLETE Column stats: NONE
                       value expressions: _col1 (type: string)
             Execution mode: vectorized, llap
@@ -1184,6 +1430,12 @@ STAGE PLANS:
                 allNative: true
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0, 1]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
         Map 5 
             Map Operator Tree:
                 TableScan
@@ -1191,6 +1443,7 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: key (type: string)
                     outputColumnNames: _col0
@@ -1205,6 +1458,7 @@ STAGE PLANS:
                       Map-reduce partition columns: _col0 (type: string)
                       Reduce Sink Vectorization:
                           className: VectorReduceSinkStringOperator
+                          keyColumns: 0:string
                           native: true
                           nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                       Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
@@ -1219,6 +1473,12 @@ STAGE PLANS:
                 allNative: true
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -1241,14 +1501,24 @@ STAGE PLANS:
                     sort order: 
                     Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
                     value expressions: _col0 (type: bigint)
+            MergeJoin Vectorization:
+                enabled: false
+                enableConditionsNotMet: Vectorizing MergeJoin Supported IS 
false
         Reducer 3 
             Execution mode: vectorized, llap
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: 
+                reduceColumnSortOrder: 
                 allNative: false
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
@@ -1279,32 +1549,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: FROM
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-LEFT OUTER JOIN
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
-ON (x.key = Y.key)
-LEFT OUTER JOIN
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
-ON (x.key = Z.key)
-select sum(hash(Y.key,Y.value))
-PREHOOK: type: QUERY
-PREHOOK: Input: default@orcsrc_n0
-#### A masked pattern was here ####
-POSTHOOK: query: FROM
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-LEFT OUTER JOIN
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
-ON (x.key = Y.key)
-LEFT OUTER JOIN
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
-ON (x.key = Z.key)
-select sum(hash(Y.key,Y.value))
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@orcsrc_n0
-#### A masked pattern was here ####
-348019368476
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
 LEFT OUTER JOIN
@@ -1315,7 +1560,7 @@ RIGHT OUTER JOIN
 ON (x.key = Z.key)
 select sum(hash(Y.key,Y.value))
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
 LEFT OUTER JOIN
@@ -1350,6 +1595,7 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: key (type: string)
                     outputColumnNames: _col0
@@ -1364,6 +1610,7 @@ STAGE PLANS:
                       Map-reduce partition columns: _col0 (type: string)
                       Reduce Sink Vectorization:
                           className: VectorReduceSinkStringOperator
+                          keyColumns: 0:string
                           native: true
                           nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                       Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
@@ -1378,6 +1625,12 @@ STAGE PLANS:
                 allNative: true
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
         Map 4 
             Map Operator Tree:
                 TableScan
@@ -1385,6 +1638,7 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 175168 Basic stats: 
COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: key (type: string), value (type: string)
                     outputColumnNames: _col0, _col1
@@ -1399,8 +1653,10 @@ STAGE PLANS:
                       Map-reduce partition columns: _col0 (type: string)
                       Reduce Sink Vectorization:
                           className: VectorReduceSinkStringOperator
+                          keyColumns: 0:string
                           native: true
                           nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          valueColumns: 1:string
                       Statistics: Num rows: 500 Data size: 175168 Basic stats: 
COMPLETE Column stats: NONE
                       value expressions: _col1 (type: string)
             Execution mode: vectorized, llap
@@ -1414,6 +1670,12 @@ STAGE PLANS:
                 allNative: true
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0, 1]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
         Map 5 
             Map Operator Tree:
                 TableScan
@@ -1421,6 +1683,7 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: key (type: string)
                     outputColumnNames: _col0
@@ -1435,6 +1698,7 @@ STAGE PLANS:
                       Map-reduce partition columns: _col0 (type: string)
                       Reduce Sink Vectorization:
                           className: VectorReduceSinkStringOperator
+                          keyColumns: 0:string
                           native: true
                           nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                       Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
@@ -1449,6 +1713,12 @@ STAGE PLANS:
                 allNative: true
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -1471,14 +1741,24 @@ STAGE PLANS:
                     sort order: 
                     Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
                     value expressions: _col0 (type: bigint)
+            MergeJoin Vectorization:
+                enabled: false
+                enableConditionsNotMet: Vectorizing MergeJoin Supported IS 
false
         Reducer 3 
             Execution mode: vectorized, llap
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: 
+                reduceColumnSortOrder: 
                 allNative: false
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
@@ -1509,32 +1789,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: FROM
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-LEFT OUTER JOIN
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
-ON (x.key = Y.key)
-RIGHT OUTER JOIN
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
-ON (x.key = Z.key)
-select sum(hash(Y.key,Y.value))
-PREHOOK: type: QUERY
-PREHOOK: Input: default@orcsrc_n0
-#### A masked pattern was here ####
-POSTHOOK: query: FROM
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-LEFT OUTER JOIN
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
-ON (x.key = Y.key)
-RIGHT OUTER JOIN
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
-ON (x.key = Z.key)
-select sum(hash(Y.key,Y.value))
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@orcsrc_n0
-#### A masked pattern was here ####
-348019368476
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
 RIGHT OUTER JOIN
@@ -1545,7 +1800,7 @@ RIGHT OUTER JOIN
 ON (x.key = Z.key)
 select sum(hash(Y.key,Y.value))
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
 RIGHT OUTER JOIN
@@ -1580,6 +1835,7 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: key (type: string)
                     outputColumnNames: _col0
@@ -1594,6 +1850,7 @@ STAGE PLANS:
                       Map-reduce partition columns: _col0 (type: string)
                       Reduce Sink Vectorization:
                           className: VectorReduceSinkStringOperator
+                          keyColumns: 0:string
                           native: true
                           nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                       Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
@@ -1608,6 +1865,12 @@ STAGE PLANS:
                 allNative: true
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
         Map 4 
             Map Operator Tree:
                 TableScan
@@ -1615,6 +1878,7 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 175168 Basic stats: 
COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: key (type: string), value (type: string)
                     outputColumnNames: _col0, _col1
@@ -1629,8 +1893,10 @@ STAGE PLANS:
                       Map-reduce partition columns: _col0 (type: string)
                       Reduce Sink Vectorization:
                           className: VectorReduceSinkStringOperator
+                          keyColumns: 0:string
                           native: true
                           nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          valueColumns: 1:string
                       Statistics: Num rows: 500 Data size: 175168 Basic stats: 
COMPLETE Column stats: NONE
                       value expressions: _col1 (type: string)
             Execution mode: vectorized, llap
@@ -1644,6 +1910,12 @@ STAGE PLANS:
                 allNative: true
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0, 1]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
         Map 5 
             Map Operator Tree:
                 TableScan
@@ -1651,6 +1923,7 @@ STAGE PLANS:
                   Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: key (type: string)
                     outputColumnNames: _col0
@@ -1665,6 +1938,7 @@ STAGE PLANS:
                       Map-reduce partition columns: _col0 (type: string)
                       Reduce Sink Vectorization:
                           className: VectorReduceSinkStringOperator
+                          keyColumns: 0:string
                           native: true
                           nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                       Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
@@ -1679,6 +1953,12 @@ STAGE PLANS:
                 allNative: true
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -1701,14 +1981,24 @@ STAGE PLANS:
                     sort order: 
                     Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
                     value expressions: _col0 (type: bigint)
+            MergeJoin Vectorization:
+                enabled: false
+                enableConditionsNotMet: Vectorizing MergeJoin Supported IS 
false
         Reducer 3 
             Execution mode: vectorized, llap
             Reduce Vectorization:
                 enabled: true
                 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: 
+                reduceColumnSortOrder: 
                 allNative: false
                 usesVectorUDFAdaptor: false
                 vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
@@ -1739,28 +2029,1443 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: FROM
+PREHOOK: query: explain vectorization detail
+FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-RIGHT OUTER JOIN
+JOIN
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
 ON (x.key = Y.key)
-RIGHT OUTER JOIN
+FULL OUTER JOIN
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
 ON (x.key = Z.key)
 select sum(hash(Y.key,Y.value))
 PREHOOK: type: QUERY
-PREHOOK: Input: default@orcsrc_n0
-#### A masked pattern was here ####
-POSTHOOK: query: FROM
+POSTHOOK: query: explain vectorization detail
+FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-RIGHT OUTER JOIN
+JOIN
+(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
+ON (x.key = Y.key)
+FULL OUTER JOIN
+(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 
(SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc_n0
+                  Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: _col0
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0]
+                    Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkStringOperator
+                          keyColumns: 0:string
+                          native: true
+                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                      Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc_n0
+                  Statistics: Num rows: 500 Data size: 175168 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0, 1]
+                    Statistics: Num rows: 500 Data size: 175168 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkStringOperator
+                          keyColumns: 0:string
+                          native: true
+                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          valueColumns: 1:string
+                      Statistics: Num rows: 500 Data size: 175168 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col1 (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0, 1]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc_n0
+                  Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: _col0
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0]
+                    Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkStringOperator
+                          keyColumns: 0:string
+                          native: true
+                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                      Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                     Full Outer Join 0 to 2
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                  2 _col0 (type: string)
+                outputColumnNames: _col2, _col3
+                Statistics: Num rows: 1100 Data size: 192684 Basic stats: 
COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: sum(hash(_col2,_col3))
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+            MergeJoin Vectorization:
+                enabled: false
+                enableConditionsNotMet: Vectorizing MergeJoin Supported IS 
false
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: 
+                reduceColumnSortOrder: 
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    native: false
+                    vectorProcessingMode: GLOBAL
+                    projectedOutputColumnNums: [0]
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain vectorization detail
+FROM 
+(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
+FULL OUTER JOIN
+(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
+ON (x.key = Y.key)
+FULL OUTER JOIN
+(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+FROM 
+(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
+FULL OUTER JOIN
+(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
+ON (x.key = Y.key)
+FULL OUTER JOIN
+(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
+ON (x.key = Z.key)
+select sum(hash(Y.key,Y.value))
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 
(SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc_n0
+                  Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: _col0
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0]
+                    Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkStringOperator
+                          keyColumns: 0:string
+                          native: true
+                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                      Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc_n0
+                  Statistics: Num rows: 500 Data size: 175168 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0, 1]
+                    Statistics: Num rows: 500 Data size: 175168 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkStringOperator
+                          keyColumns: 0:string
+                          native: true
+                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          valueColumns: 1:string
+                      Statistics: Num rows: 500 Data size: 175168 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col1 (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0, 1]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: orcsrc_n0
+                  Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: _col0
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0]
+                    Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: string)
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkStringOperator
+                          keyColumns: 0:string
+                          native: true
+                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                      Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Full Outer Join 0 to 1
+                     Full Outer Join 0 to 2
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                  2 _col0 (type: string)
+                outputColumnNames: _col2, _col3
+                Statistics: Num rows: 1100 Data size: 192684 Basic stats: 
COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: sum(hash(_col2,_col3))
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+            MergeJoin Vectorization:
+                enabled: false
+                enableConditionsNotMet: Vectorizing MergeJoin Supported IS 
false
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: 
+                reduceColumnSortOrder: 
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    native: false
+                    vectorProcessingMode: GLOBAL
+                    projectedOutputColumnNums: [0]
+                mode: mergepartial
+


<TRUNCATED>

[20/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

Reply via email to