[01/16] hive git commit: HIVE-20778: Join reordering may not be triggered if all joins in plan are created by decorrelation logic (Vineet Garg via Jesus Camacho Rodriguez)

jcamacho Sat, 03 Nov 2018 09:35:40 -0700

Repository: hive
Updated Branches:
  refs/heads/master 6dd01360a -> ae1eb15d4



http://git-wip-us.apache.org/repos/asf/hive/blob/ae1eb15d/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out 
b/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out
index 70cdd7a..1f60366 100644
--- a/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out
@@ -1,4 +1,6 @@
-Warning: Map Join MAPJOIN[42][bigTable=?] in task 'Stage-8:MAPRED' is a cross 
product
+Warning: Map Join MAPJOIN[53][bigTable=?] in task 'Stage-7:MAPRED' is a cross 
product
+Warning: Map Join MAPJOIN[43][bigTable=?] in task 'Stage-6:MAPRED' is a cross 
product
+Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 
'Stage-2:MAPRED' is a cross product
 PREHOOK: query: explain vectorization expression
 select *
 from src
@@ -23,13 +25,17 @@ PLAN VECTORIZATION:
 
 STAGE DEPENDENCIES:
   Stage-4 is a root stage
-  Stage-10 depends on stages: Stage-4
-  Stage-8 depends on stages: Stage-10
-  Stage-7 depends on stages: Stage-5, Stage-8 , consists of Stage-9, Stage-2
-  Stage-9 has a backup stage: Stage-2
-  Stage-6 depends on stages: Stage-9
-  Stage-3 depends on stages: Stage-2, Stage-6
+  Stage-10 depends on stages: Stage-4 , consists of Stage-13, Stage-1
+  Stage-13 has a backup stage: Stage-1
+  Stage-9 depends on stages: Stage-13
+  Stage-8 depends on stages: Stage-1, Stage-5, Stage-9 , consists of Stage-11, 
Stage-12, Stage-2
+  Stage-11 has a backup stage: Stage-2
+  Stage-6 depends on stages: Stage-11
+  Stage-3 depends on stages: Stage-2, Stage-6, Stage-7
+  Stage-12 has a backup stage: Stage-2
+  Stage-7 depends on stages: Stage-12
   Stage-2
+  Stage-1
   Stage-5 is a root stage
   Stage-0 depends on stages: Stage-3
 
@@ -51,26 +57,27 @@ STAGE PLANS:
                   projectedOutputColumnNums: [0]
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
-                aggregations: count(), count(key)
                 Group By Vectorization:
-                    aggregators: VectorUDAFCountStar(*) -> bigint, 
VectorUDAFCount(col 0:string) -> bigint
                     className: VectorGroupByOperator
                     groupByMode: HASH
+                    keyExpressions: col 0:string
                     native: false
                     vectorProcessingMode: HASH
-                    projectedOutputColumnNums: [0, 1]
+                    projectedOutputColumnNums: []
+                keys: key (type: string)
                 mode: hash
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE 
Column stats: NONE
+                outputColumnNames: _col0
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  sort order: 
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
                   Reduce Sink Vectorization:
                       className: VectorReduceSinkOperator
                       native: false
                       nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
                       nativeConditionsNotMet: hive.execution.engine mr IN 
[tez, spark] IS false
-                  Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE 
Column stats: NONE
-                  value expressions: _col0 (type: bigint), _col1 (type: bigint)
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
       Execution mode: vectorized
       Map Vectorization:
           enabled: true
@@ -87,95 +94,107 @@ STAGE PLANS:
           enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
       Reduce Operator Tree:
         Group By Operator
-          aggregations: count(VALUE._col0), count(VALUE._col1)
+          keys: KEY._col0 (type: string)
           mode: mergepartial
-          outputColumnNames: _col0, _col1
-          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column 
stats: NONE
-          File Output Operator
-            compressed: false
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          outputColumnNames: _col0
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), true (type: boolean)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-10
+    Conditional Operator
+
+  Stage: Stage-13
     Map Reduce Local Work
       Alias -> Map Local Tables:
-        $hdt$_0:src 
+        $hdt$_0:$INTNAME 
           Fetch Operator
             limit: -1
       Alias -> Map Local Operator Tree:
-        $hdt$_0:src 
+        $hdt$_0:$INTNAME 
+          TableScan
+            HashTable Sink Operator
+              keys:
+                0 _col0 (type: string)
+                1 _col0 (type: string)
+
+  Stage: Stage-9
+    Map Reduce
+      Map Operator Tree:
           TableScan
             alias: src
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            TableScan Vectorization:
+                native: true
             Select Operator
               expressions: key (type: string), value (type: string)
               outputColumnNames: _col0, _col1
+              Select Vectorization:
+                  className: VectorSelectOperator
+                  native: true
+                  projectedOutputColumnNums: [0, 1]
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
-              HashTable Sink Operator
+              Map Join Operator
+                condition map:
+                     Left Outer Join 0 to 1
                 keys:
-                  0 
-                  1 
-
-  Stage: Stage-8
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            TableScan Vectorization:
-                native: true
-            Map Join Operator
-              condition map:
-                   Inner Join 0 to 1
-              keys:
-                0 
-                1 
-              Map Join Vectorization:
-                  bigTableValueExpressions: col 0:bigint, col 1:bigint
-                  className: VectorMapJoinOperator
-                  native: false
-                  nativeConditionsMet: hive.mapjoin.optimized.hashtable IS 
true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin 
Condition IS true, No nullsafe IS true, Small table vectorizes IS true, 
Optimized Table and Supports Key Types IS true
-                  nativeConditionsNotMet: hive.execution.engine mr IN [tez, 
spark] IS false
-              outputColumnNames: _col0, _col1, _col2, _col3
-              Statistics: Num rows: 500 Data size: 13812 Basic stats: COMPLETE 
Column stats: NONE
-              File Output Operator
-                compressed: false
-                File Sink Vectorization:
-                    className: VectorFileSinkOperator
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                Map Join Vectorization:
+                    bigTableKeyExpressions: col 0:string
+                    bigTableValueExpressions: col 0:string, col 1:string
+                    className: VectorMapJoinOperator
                     native: false
-                table:
-                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                    serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                    nativeConditionsMet: hive.mapjoin.optimized.hashtable IS 
true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin 
Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer 
Join has keys IS true, Optimized Table and Supports Key Types IS true
+                    nativeConditionsNotMet: hive.execution.engine mr IN [tez, 
spark] IS false
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
       Execution mode: vectorized
       Map Vectorization:
           enabled: true
           enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize 
IS true
-          inputFormatFeatureSupport: []
-          featureSupportInUse: []
-          inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
+          inputFormatFeatureSupport: [DECIMAL_64]
+          featureSupportInUse: [DECIMAL_64]
+          inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
           allNative: false
           usesVectorUDFAdaptor: false
           vectorized: true
       Local Work:
         Map Reduce Local Work
 
-  Stage: Stage-7
+  Stage: Stage-8
     Conditional Operator
 
-  Stage: Stage-9
+  Stage: Stage-11
     Map Reduce Local Work
       Alias -> Map Local Tables:
-        $INTNAME1 
+        $hdt$_0:$INTNAME1 
           Fetch Operator
             limit: -1
       Alias -> Map Local Operator Tree:
-        $INTNAME1 
+        $hdt$_0:$INTNAME1 
           TableScan
             HashTable Sink Operator
               keys:
-                0 _col0 (type: string)
-                1 _col0 (type: string)
+                0 
+                1 
 
   Stage: Stage-6
     Map Reduce
@@ -185,43 +204,50 @@ STAGE PLANS:
                 native: true
             Map Join Operator
               condition map:
-                   Left Outer Join 0 to 1
+                   Inner Join 0 to 1
               keys:
-                0 _col0 (type: string)
-                1 _col0 (type: string)
+                0 
+                1 
               Map Join Vectorization:
-                  bigTableKeyExpressions: col 0:string
-                  bigTableValueExpressions: col 0:string, col 1:string, col 
2:bigint, col 3:bigint
+                  bigTableValueExpressions: col 0:string, col 1:string, col 
2:boolean
                   className: VectorMapJoinOperator
                   native: false
-                  nativeConditionsMet: hive.mapjoin.optimized.hashtable IS 
true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin 
Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer 
Join has keys IS true, Optimized Table and Supports Key Types IS true
+                  nativeConditionsMet: hive.mapjoin.optimized.hashtable IS 
true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin 
Condition IS true, No nullsafe IS true, Small table vectorizes IS true, 
Optimized Table and Supports Key Types IS true
                   nativeConditionsNotMet: hive.execution.engine mr IN [tez, 
spark] IS false
-              outputColumnNames: _col0, _col1, _col2, _col3, _col5
+              outputColumnNames: _col0, _col1, _col3, _col4, _col5
               Statistics: Num rows: 550 Data size: 15193 Basic stats: COMPLETE 
Column stats: NONE
-              Filter Operator
-                Filter Vectorization:
-                    className: VectorFilterOperator
+              Select Operator
+                expressions: _col0 (type: string), _col1 (type: string), _col4 
(type: bigint), _col5 (type: bigint), _col3 (type: boolean)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col5
+                Select Vectorization:
+                    className: VectorSelectOperator
                     native: true
-                    predicateExpression: FilterExprOrExpr(children: 
FilterLongColEqualLongScalar(col 2:bigint, val 0), FilterExprAndExpr(children: 
SelectColumnIsNull(col 4:boolean), SelectColumnIsNotNull(col 0:string), 
FilterLongColGreaterEqualLongColumn(col 3:bigint, col 2:bigint)))
-                predicate: ((_col2 = 0L) or (_col5 is null and _col0 is not 
null and (_col3 >= _col2))) (type: boolean)
-                Statistics: Num rows: 366 Data size: 10110 Basic stats: 
COMPLETE Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: string), _col1 (type: string)
-                  outputColumnNames: _col0, _col1
-                  Select Vectorization:
-                      className: VectorSelectOperator
+                    projectedOutputColumnNums: [0, 1, 3, 4, 2]
+                Statistics: Num rows: 550 Data size: 15193 Basic stats: 
COMPLETE Column stats: NONE
+                Filter Operator
+                  Filter Vectorization:
+                      className: VectorFilterOperator
                       native: true
-                      projectedOutputColumnNums: [0, 1]
+                      predicateExpression: FilterExprOrExpr(children: 
FilterLongColEqualLongScalar(col 3:bigint, val 0), FilterExprAndExpr(children: 
SelectColumnIsNull(col 2:boolean), SelectColumnIsNotNull(col 0:string), 
FilterLongColGreaterEqualLongColumn(col 4:bigint, col 3:bigint)))
+                  predicate: ((_col2 = 0L) or (_col5 is null and _col0 is not 
null and (_col3 >= _col2))) (type: boolean)
                   Statistics: Num rows: 366 Data size: 10110 Basic stats: 
COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    File Sink Vectorization:
-                        className: VectorFileSinkOperator
-                        native: false
-                    table:
-                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+                  Select Operator
+                    expressions: _col0 (type: string), _col1 (type: string)
+                    outputColumnNames: _col0, _col1
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0, 1]
+                    Statistics: Num rows: 366 Data size: 10110 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      File Sink Vectorization:
+                          className: VectorFileSinkOperator
+                          native: false
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
       Execution mode: vectorized
       Map Vectorization:
           enabled: true
@@ -278,16 +304,141 @@ STAGE PLANS:
                 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
+  Stage: Stage-12
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $hdt$_0:$INTNAME 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $hdt$_0:$INTNAME 
+          TableScan
+            HashTable Sink Operator
+              keys:
+                0 
+                1 
+
+  Stage: Stage-7
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            TableScan Vectorization:
+                native: true
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              keys:
+                0 
+                1 
+              Map Join Vectorization:
+                  bigTableValueExpressions: col 0:bigint, col 1:bigint
+                  className: VectorMapJoinOperator
+                  native: false
+                  nativeConditionsMet: hive.mapjoin.optimized.hashtable IS 
true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin 
Condition IS true, No nullsafe IS true, Small table vectorizes IS true, 
Optimized Table and Supports Key Types IS true
+                  nativeConditionsNotMet: hive.execution.engine mr IN [tez, 
spark] IS false
+              outputColumnNames: _col0, _col1, _col3, _col4, _col5
+              Statistics: Num rows: 550 Data size: 15193 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: _col0 (type: string), _col1 (type: string), _col4 
(type: bigint), _col5 (type: bigint), _col3 (type: boolean)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col5
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumnNums: [0, 1, 3, 4, 2]
+                Statistics: Num rows: 550 Data size: 15193 Basic stats: 
COMPLETE Column stats: NONE
+                Filter Operator
+                  Filter Vectorization:
+                      className: VectorFilterOperator
+                      native: true
+                      predicateExpression: FilterExprOrExpr(children: 
FilterLongColEqualLongScalar(col 3:bigint, val 0), FilterExprAndExpr(children: 
SelectColumnIsNull(col 2:boolean), SelectColumnIsNotNull(col 0:string), 
FilterLongColGreaterEqualLongColumn(col 4:bigint, col 3:bigint)))
+                  predicate: ((_col2 = 0L) or (_col5 is null and _col0 is not 
null and (_col3 >= _col2))) (type: boolean)
+                  Statistics: Num rows: 366 Data size: 10110 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: _col0 (type: string), _col1 (type: string)
+                    outputColumnNames: _col0, _col1
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0, 1]
+                    Statistics: Num rows: 366 Data size: 10110 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      File Sink Vectorization:
+                          className: VectorFileSinkOperator
+                          native: false
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Execution mode: vectorized
+      Map Vectorization:
+          enabled: true
+          enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize 
IS true
+          inputFormatFeatureSupport: []
+          featureSupportInUse: []
+          inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat
+          allNative: false
+          usesVectorUDFAdaptor: false
+          vectorized: true
+      Local Work:
+        Map Reduce Local Work
+
   Stage: Stage-2
     Map Reduce
       Map Operator Tree:
           TableScan
             Reduce Output Operator
-              key expressions: _col0 (type: string)
-              sort order: +
-              Map-reduce partition columns: _col0 (type: string)
-              Statistics: Num rows: 500 Data size: 13812 Basic stats: COMPLETE 
Column stats: NONE
-              value expressions: _col1 (type: string), _col2 (type: bigint), 
_col3 (type: bigint)
+              sort order: 
+              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: boolean)
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE 
Column stats: NONE
+              value expressions: _col0 (type: bigint), _col1 (type: bigint)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 
+            1 
+          outputColumnNames: _col0, _col1, _col3, _col4, _col5
+          Statistics: Num rows: 550 Data size: 15193 Basic stats: COMPLETE 
Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: string), _col4 
(type: bigint), _col5 (type: bigint), _col3 (type: boolean)
+            outputColumnNames: _col0, _col1, _col2, _col3, _col5
+            Statistics: Num rows: 550 Data size: 15193 Basic stats: COMPLETE 
Column stats: NONE
+            Filter Operator
+              predicate: ((_col2 = 0L) or (_col5 is null and _col0 is not null 
and (_col3 >= _col2))) (type: boolean)
+              Statistics: Num rows: 366 Data size: 10110 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: _col0 (type: string), _col1 (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 366 Data size: 10110 Basic stats: 
COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col0 (type: string)
+                sort order: +
+                Map-reduce partition columns: _col0 (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                value expressions: _col1 (type: string)
           TableScan
             Reduce Output Operator
               key expressions: _col0 (type: string)
@@ -302,21 +453,14 @@ STAGE PLANS:
           keys:
             0 _col0 (type: string)
             1 _col0 (type: string)
-          outputColumnNames: _col0, _col1, _col2, _col3, _col5
-          Statistics: Num rows: 550 Data size: 15193 Basic stats: COMPLETE 
Column stats: NONE
-          Filter Operator
-            predicate: ((_col2 = 0L) or (_col5 is null and _col0 is not null 
and (_col3 >= _col2))) (type: boolean)
-            Statistics: Num rows: 366 Data size: 10110 Basic stats: COMPLETE 
Column stats: NONE
-            Select Operator
-              expressions: _col0 (type: string), _col1 (type: string)
-              outputColumnNames: _col0, _col1
-              Statistics: Num rows: 366 Data size: 10110 Basic stats: COMPLETE 
Column stats: NONE
-              File Output Operator
-                compressed: false
-                table:
-                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                    serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          outputColumnNames: _col0, _col1, _col3
+          Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-5
     Map Reduce
@@ -335,27 +479,26 @@ STAGE PLANS:
                   projectedOutputColumnNums: [0]
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
               Group By Operator
+                aggregations: count(), count(key)
                 Group By Vectorization:
+                    aggregators: VectorUDAFCountStar(*) -> bigint, 
VectorUDAFCount(col 0:string) -> bigint
                     className: VectorGroupByOperator
                     groupByMode: HASH
-                    keyExpressions: col 0:string
                     native: false
                     vectorProcessingMode: HASH
-                    projectedOutputColumnNums: []
-                keys: key (type: string)
+                    projectedOutputColumnNums: [0, 1]
                 mode: hash
-                outputColumnNames: _col0
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col0 (type: string)
+                  sort order: 
                   Reduce Sink Vectorization:
                       className: VectorReduceSinkOperator
                       native: false
                       nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, 
No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
                       nativeConditionsNotMet: hive.execution.engine mr IN 
[tez, spark] IS false
-                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE 
Column stats: NONE
+                  value expressions: _col0 (type: bigint), _col1 (type: bigint)
       Execution mode: vectorized
       Map Vectorization:
           enabled: true
@@ -372,20 +515,16 @@ STAGE PLANS:
           enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS 
false
       Reduce Operator Tree:
         Group By Operator
-          keys: KEY._col0 (type: string)
+          aggregations: count(VALUE._col0), count(VALUE._col1)
           mode: mergepartial
-          outputColumnNames: _col0
-          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
-          Select Operator
-            expressions: _col0 (type: string), true (type: boolean)
-            outputColumnNames: _col0, _col1
-            Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
-            File Output Operator
-              compressed: false
-              table:
-                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -393,7 +532,9 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Map Join MAPJOIN[42][bigTable=?] in task 'Stage-8:MAPRED' is a cross 
product
+Warning: Map Join MAPJOIN[53][bigTable=?] in task 'Stage-7:MAPRED' is a cross 
product
+Warning: Map Join MAPJOIN[43][bigTable=?] in task 'Stage-6:MAPRED' is a cross 
product
+Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 
'Stage-2:MAPRED' is a cross product
 PREHOOK: query: select *
 from src
 where not key in
@@ -422,7 +563,9 @@ POSTHOOK: Output: database:default
 POSTHOOK: Output: default@orcsrc
 POSTHOOK: Lineage: orcsrc.key SIMPLE [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
 POSTHOOK: Lineage: orcsrc.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
-Warning: Map Join MAPJOIN[42][bigTable=?] in task 'Stage-8:MAPRED' is a cross 
product
+Warning: Map Join MAPJOIN[53][bigTable=?] in task 'Stage-7:MAPRED' is a cross 
product
+Warning: Map Join MAPJOIN[43][bigTable=?] in task 'Stage-6:MAPRED' is a cross 
product
+Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 
'Stage-2:MAPRED' is a cross product
 PREHOOK: query: select *
 from orcsrc
 where not key in
@@ -439,7 +582,9 @@ order by key
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@orcsrc
 #### A masked pattern was here ####
-Warning: Map Join MAPJOIN[42][bigTable=?] in task 'Stage-8:MAPRED' is a cross 
product
+Warning: Map Join MAPJOIN[53][bigTable=?] in task 'Stage-7:MAPRED' is a cross 
product
+Warning: Map Join MAPJOIN[43][bigTable=?] in task 'Stage-6:MAPRED' is a cross 
product
+Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 
'Stage-2:MAPRED' is a cross product
 PREHOOK: query: select *
 from orcsrc
 where not key in

[01/16] hive git commit: HIVE-20778: Join reordering may not be triggered if all joins in plan are created by decorrelation logic (Vineet Garg via Jesus Camacho Rodriguez)

Reply via email to