conversions (Matt McCline, reviewed by Teddy Choi)"

mmccline Wed, 15 Aug 2018 17:20:44 -0700

Repository: hive
Updated Branches:
  refs/heads/master 489b37a54 -> 142367d96



http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query87.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query87.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query87.q.out
index e7329a3..8ac6dce 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query87.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query87.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 select count(*) 
 from ((select distinct c_last_name, c_first_name, d_date
        from store_sales, date_dim, customer
@@ -19,7 +19,7 @@ from ((select distinct c_last_name, c_first_name, d_date
          and d_month_seq between 1212 and 1212+11)
 ) cool_cust
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 select count(*) 
 from ((select distinct c_last_name, c_first_name, d_date
        from store_sales, date_dim, customer
@@ -40,10 +40,6 @@ from ((select distinct c_last_name, c_first_name, d_date
          and d_month_seq between 1212 and 1212+11)
 ) cool_cust
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-3 depends on stages: Stage-2
@@ -62,40 +58,18 @@ STAGE PLANS:
                   alias: date_dim
                   filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk 
is not null) (type: boolean)
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
-                  TableScan Vectorization:
-                      native: true
                   Filter Operator
-                    Filter Vectorization:
-                        className: VectorFilterOperator
-                        native: true
-                        predicateExpression: FilterExprAndExpr(children: 
FilterLongColumnBetween(col 3:int, left 1212, right 1223), 
SelectColumnIsNotNull(col 0:int))
                     predicate: (d_date_sk is not null and d_month_seq BETWEEN 
1212 AND 1223) (type: boolean)
                     Statistics: Num rows: 8116 Data size: 9081804 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int), d_date (type: string)
                       outputColumnNames: _col0, _col1
-                      Select Vectorization:
-                          className: VectorSelectOperator
-                          native: true
-                          projectedOutputColumnNums: [0, 2]
                       Statistics: Num rows: 8116 Data size: 9081804 Basic 
stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
-                        Spark Hash Table Sink Vectorization:
-                            className: VectorSparkHashTableSinkOperator
-                            native: true
                         keys:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
             Execution mode: vectorized
-            Map Vectorization:
-                enabled: true
-                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-                inputFormatFeatureSupport: [DECIMAL_64]
-                featureSupportInUse: [DECIMAL_64]
-                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                allNative: true
-                usesVectorUDFAdaptor: false
-                vectorized: true
             Local Work:
               Map Reduce Local Work
 
@@ -109,40 +83,18 @@ STAGE PLANS:
                   alias: date_dim
                   filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk 
is not null) (type: boolean)
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
-                  TableScan Vectorization:
-                      native: true
                   Filter Operator
-                    Filter Vectorization:
-                        className: VectorFilterOperator
-                        native: true
-                        predicateExpression: FilterExprAndExpr(children: 
FilterLongColumnBetween(col 3:int, left 1212, right 1223), 
SelectColumnIsNotNull(col 0:int))
                     predicate: (d_date_sk is not null and d_month_seq BETWEEN 
1212 AND 1223) (type: boolean)
                     Statistics: Num rows: 8116 Data size: 9081804 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int), d_date (type: string)
                       outputColumnNames: _col0, _col1
-                      Select Vectorization:
-                          className: VectorSelectOperator
-                          native: true
-                          projectedOutputColumnNums: [0, 2]
                       Statistics: Num rows: 8116 Data size: 9081804 Basic 
stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
-                        Spark Hash Table Sink Vectorization:
-                            className: VectorSparkHashTableSinkOperator
-                            native: true
                         keys:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
             Execution mode: vectorized
-            Map Vectorization:
-                enabled: true
-                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-                inputFormatFeatureSupport: [DECIMAL_64]
-                featureSupportInUse: [DECIMAL_64]
-                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                allNative: true
-                usesVectorUDFAdaptor: false
-                vectorized: true
             Local Work:
               Map Reduce Local Work
 
@@ -156,40 +108,18 @@ STAGE PLANS:
                   alias: date_dim
                   filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk 
is not null) (type: boolean)
                   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
-                  TableScan Vectorization:
-                      native: true
                   Filter Operator
-                    Filter Vectorization:
-                        className: VectorFilterOperator
-                        native: true
-                        predicateExpression: FilterExprAndExpr(children: 
FilterLongColumnBetween(col 3:int, left 1212, right 1223), 
SelectColumnIsNotNull(col 0:int))
                     predicate: (d_date_sk is not null and d_month_seq BETWEEN 
1212 AND 1223) (type: boolean)
                     Statistics: Num rows: 8116 Data size: 9081804 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: d_date_sk (type: int), d_date (type: string)
                       outputColumnNames: _col0, _col1
-                      Select Vectorization:
-                          className: VectorSelectOperator
-                          native: true
-                          projectedOutputColumnNums: [0, 2]
                       Statistics: Num rows: 8116 Data size: 9081804 Basic 
stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
-                        Spark Hash Table Sink Vectorization:
-                            className: VectorSparkHashTableSinkOperator
-                            native: true
                         keys:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
             Execution mode: vectorized
-            Map Vectorization:
-                enabled: true
-                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-                inputFormatFeatureSupport: [DECIMAL_64]
-                featureSupportInUse: [DECIMAL_64]
-                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                allNative: true
-                usesVectorUDFAdaptor: false
-                vectorized: true
             Local Work:
               Map Reduce Local Work
 
@@ -213,22 +143,12 @@ STAGE PLANS:
                   alias: store_sales
                   filterExpr: (ss_sold_date_sk is not null and ss_customer_sk 
is not null) (type: boolean)
                   Statistics: Num rows: 575995635 Data size: 50814502088 Basic 
stats: COMPLETE Column stats: NONE
-                  TableScan Vectorization:
-                      native: true
                   Filter Operator
-                    Filter Vectorization:
-                        className: VectorFilterOperator
-                        native: true
-                        predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 3:int))
                     predicate: (ss_customer_sk is not null and ss_sold_date_sk 
is not null) (type: boolean)
                     Statistics: Num rows: 575995635 Data size: 50814502088 
Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ss_sold_date_sk (type: int), ss_customer_sk 
(type: int)
                       outputColumnNames: _col0, _col1
-                      Select Vectorization:
-                          className: VectorSelectOperator
-                          native: true
-                          projectedOutputColumnNums: [0, 3]
                       Statistics: Num rows: 575995635 Data size: 50814502088 
Basic stats: COMPLETE Column stats: NONE
                       Map Join Operator
                         condition map:
@@ -236,10 +156,6 @@ STAGE PLANS:
                         keys:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
-                        Map Join Vectorization:
-                            className: VectorMapJoinInnerLongOperator
-                            native: true
-                            nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS 
true
                         outputColumnNames: _col1, _col3
                         input vertices:
                           1 Map 7
@@ -248,22 +164,9 @@ STAGE PLANS:
                           key expressions: _col1 (type: int)
                           sort order: +
                           Map-reduce partition columns: _col1 (type: int)
-                          Reduce Sink Vectorization:
-                              className: VectorReduceSinkLongOperator
-                              native: true
-                              nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                           Statistics: Num rows: 633595212 Data size: 
55895953508 Basic stats: COMPLETE Column stats: NONE
                           value expressions: _col3 (type: string)
             Execution mode: vectorized
-            Map Vectorization:
-                enabled: true
-                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-                inputFormatFeatureSupport: [DECIMAL_64]
-                featureSupportInUse: [DECIMAL_64]
-                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                allNative: true
-                usesVectorUDFAdaptor: false
-                vectorized: true
             Local Work:
               Map Reduce Local Work
         Map 13 
@@ -272,65 +175,32 @@ STAGE PLANS:
                   alias: customer
                   filterExpr: c_customer_sk is not null (type: boolean)
                   Statistics: Num rows: 80000000 Data size: 68801615852 Basic 
stats: COMPLETE Column stats: NONE
-                  TableScan Vectorization:
-                      native: true
                   Filter Operator
-                    Filter Vectorization:
-                        className: VectorFilterOperator
-                        native: true
-                        predicateExpression: SelectColumnIsNotNull(col 0:int)
                     predicate: c_customer_sk is not null (type: boolean)
                     Statistics: Num rows: 80000000 Data size: 68801615852 
Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: c_customer_sk (type: int), c_first_name 
(type: string), c_last_name (type: string)
                       outputColumnNames: _col0, _col1, _col2
-                      Select Vectorization:
-                          className: VectorSelectOperator
-                          native: true
-                          projectedOutputColumnNums: [0, 8, 9]
                       Statistics: Num rows: 80000000 Data size: 68801615852 
Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: int)
-                        Reduce Sink Vectorization:
-                            className: VectorReduceSinkLongOperator
-                            native: true
-                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         Statistics: Num rows: 80000000 Data size: 68801615852 
Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string), _col2 (type: 
string)
             Execution mode: vectorized
-            Map Vectorization:
-                enabled: true
-                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-                inputFormatFeatureSupport: [DECIMAL_64]
-                featureSupportInUse: [DECIMAL_64]
-                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                allNative: true
-                usesVectorUDFAdaptor: false
-                vectorized: true
         Map 14 
             Map Operator Tree:
                 TableScan
                   alias: web_sales
                   filterExpr: (ws_sold_date_sk is not null and 
ws_bill_customer_sk is not null) (type: boolean)
                   Statistics: Num rows: 144002668 Data size: 19580198212 Basic 
stats: COMPLETE Column stats: NONE
-                  TableScan Vectorization:
-                      native: true
                   Filter Operator
-                    Filter Vectorization:
-                        className: VectorFilterOperator
-                        native: true
-                        predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 4:int))
                     predicate: (ws_bill_customer_sk is not null and 
ws_sold_date_sk is not null) (type: boolean)
                     Statistics: Num rows: 144002668 Data size: 19580198212 
Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ws_sold_date_sk (type: int), 
ws_bill_customer_sk (type: int)
                       outputColumnNames: _col0, _col1
-                      Select Vectorization:
-                          className: VectorSelectOperator
-                          native: true
-                          projectedOutputColumnNums: [0, 4]
                       Statistics: Num rows: 144002668 Data size: 19580198212 
Basic stats: COMPLETE Column stats: NONE
                       Map Join Operator
                         condition map:
@@ -338,10 +208,6 @@ STAGE PLANS:
                         keys:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
-                        Map Join Vectorization:
-                            className: VectorMapJoinInnerLongOperator
-                            native: true
-                            nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS 
true
                         outputColumnNames: _col1, _col3
                         input vertices:
                           1 Map 17
@@ -350,22 +216,9 @@ STAGE PLANS:
                           key expressions: _col1 (type: int)
                           sort order: +
                           Map-reduce partition columns: _col1 (type: int)
-                          Reduce Sink Vectorization:
-                              className: VectorReduceSinkLongOperator
-                              native: true
-                              nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                           Statistics: Num rows: 158402938 Data size: 
21538218500 Basic stats: COMPLETE Column stats: NONE
                           value expressions: _col3 (type: string)
             Execution mode: vectorized
-            Map Vectorization:
-                enabled: true
-                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-                inputFormatFeatureSupport: [DECIMAL_64]
-                featureSupportInUse: [DECIMAL_64]
-                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                allNative: true
-                usesVectorUDFAdaptor: false
-                vectorized: true
             Local Work:
               Map Reduce Local Work
         Map 9 
@@ -374,22 +227,12 @@ STAGE PLANS:
                   alias: catalog_sales
                   filterExpr: (cs_sold_date_sk is not null and 
cs_bill_customer_sk is not null) (type: boolean)
                   Statistics: Num rows: 287989836 Data size: 38999608952 Basic 
stats: COMPLETE Column stats: NONE
-                  TableScan Vectorization:
-                      native: true
                   Filter Operator
-                    Filter Vectorization:
-                        className: VectorFilterOperator
-                        native: true
-                        predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 3:int))
                     predicate: (cs_bill_customer_sk is not null and 
cs_sold_date_sk is not null) (type: boolean)
                     Statistics: Num rows: 287989836 Data size: 38999608952 
Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cs_sold_date_sk (type: int), 
cs_bill_customer_sk (type: int)
                       outputColumnNames: _col0, _col1
-                      Select Vectorization:
-                          className: VectorSelectOperator
-                          native: true
-                          projectedOutputColumnNums: [0, 3]
                       Statistics: Num rows: 287989836 Data size: 38999608952 
Basic stats: COMPLETE Column stats: NONE
                       Map Join Operator
                         condition map:
@@ -397,10 +240,6 @@ STAGE PLANS:
                         keys:
                           0 _col0 (type: int)
                           1 _col0 (type: int)
-                        Map Join Vectorization:
-                            className: VectorMapJoinInnerLongOperator
-                            native: true
-                            nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS 
true
                         outputColumnNames: _col1, _col3
                         input vertices:
                           1 Map 12
@@ -409,30 +248,12 @@ STAGE PLANS:
                           key expressions: _col1 (type: int)
                           sort order: +
                           Map-reduce partition columns: _col1 (type: int)
-                          Reduce Sink Vectorization:
-                              className: VectorReduceSinkLongOperator
-                              native: true
-                              nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                           Statistics: Num rows: 316788826 Data size: 
42899570777 Basic stats: COMPLETE Column stats: NONE
                           value expressions: _col3 (type: string)
             Execution mode: vectorized
-            Map Vectorization:
-                enabled: true
-                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-                inputFormatFeatureSupport: [DECIMAL_64]
-                featureSupportInUse: [DECIMAL_64]
-                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                allNative: true
-                usesVectorUDFAdaptor: false
-                vectorized: true
             Local Work:
               Map Reduce Local Work
         Reducer 10 
-            Reduce Vectorization:
-                enabled: true
-                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                notVectorizedReason: Tagging not supported
-                vectorized: false
             Reduce Operator Tree:
               Join Operator
                 condition map:
@@ -454,21 +275,8 @@ STAGE PLANS:
                     Statistics: Num rows: 348467716 Data size: 47189528877 
Basic stats: COMPLETE Column stats: NONE
         Reducer 11 
             Execution mode: vectorized
-            Reduce Vectorization:
-                enabled: true
-                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
             Reduce Operator Tree:
               Group By Operator
-                Group By Vectorization:
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    keyExpressions: col 0:string, col 1:string, col 2:string
-                    native: false
-                    vectorProcessingMode: MERGE_PARTIAL
-                    projectedOutputColumnNums: []
                 keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2
@@ -476,21 +284,9 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col1 (type: string), _col0 (type: string), 
_col2 (type: string)
                   outputColumnNames: _col0, _col1, _col2
-                  Select Vectorization:
-                      className: VectorSelectOperator
-                      native: true
-                      projectedOutputColumnNums: [1, 0, 2]
                   Statistics: Num rows: 174233858 Data size: 23594764438 Basic 
stats: COMPLETE Column stats: NONE
                   Group By Operator
                     aggregations: count()
-                    Group By Vectorization:
-                        aggregators: VectorUDAFCountStar(*) -> bigint
-                        className: VectorGroupByOperator
-                        groupByMode: COMPLETE
-                        keyExpressions: col 0:string, col 1:string, col 
2:string
-                        native: false
-                        vectorProcessingMode: STREAMING
-                        projectedOutputColumnNums: [0]
                     keys: _col1 (type: string), _col0 (type: string), _col2 
(type: string)
                     mode: complete
                     outputColumnNames: _col0, _col1, _col2, _col3
@@ -498,31 +294,13 @@ STAGE PLANS:
                     Select Operator
                       expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string), 1L (type: bigint), _col3 (type: bigint)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                      Select Vectorization:
-                          className: VectorSelectOperator
-                          native: true
-                          projectedOutputColumnNums: [0, 1, 2, 4, 3]
-                          selectExpressions: ConstantVectorExpression(val 1) 
-> 4:bigint
                       Statistics: Num rows: 87116929 Data size: 11797382219 
Basic stats: COMPLETE Column stats: NONE
                       Select Operator
                         expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: string), _col4 (type: bigint), (_col3 * _col4) (type: 
bigint)
                         outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                        Select Vectorization:
-                            className: VectorSelectOperator
-                            native: true
-                            projectedOutputColumnNums: [0, 1, 2, 3, 5]
-                            selectExpressions: LongColMultiplyLongColumn(col 
4:bigint, col 3:bigint) -> 5:bigint
                         Statistics: Num rows: 261355616 Data size: 27168769766 
Basic stats: COMPLETE Column stats: NONE
                         Group By Operator
                           aggregations: sum(_col3), sum(_col4)
-                          Group By Vectorization:
-                              aggregators: VectorUDAFSumLong(col 3:bigint) -> 
bigint, VectorUDAFSumLong(col 5:bigint) -> bigint
-                              className: VectorGroupByOperator
-                              groupByMode: HASH
-                              keyExpressions: col 0:string, col 1:string, col 
2:string
-                              native: false
-                              vectorProcessingMode: HASH
-                              projectedOutputColumnNums: [0, 1]
                           keys: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2, _col3, _col4
@@ -531,18 +309,9 @@ STAGE PLANS:
                             key expressions: _col0 (type: string), _col1 
(type: string), _col2 (type: string)
                             sort order: +++
                             Map-reduce partition columns: _col0 (type: 
string), _col1 (type: string), _col2 (type: string)
-                            Reduce Sink Vectorization:
-                                className: VectorReduceSinkMultiKeyOperator
-                                native: true
-                                nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                             Statistics: Num rows: 261355616 Data size: 
27168769766 Basic stats: COMPLETE Column stats: NONE
                             value expressions: _col3 (type: bigint), _col4 
(type: bigint)
         Reducer 15 
-            Reduce Vectorization:
-                enabled: true
-                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                notVectorizedReason: Tagging not supported
-                vectorized: false
             Reduce Operator Tree:
               Join Operator
                 condition map:
@@ -564,21 +333,8 @@ STAGE PLANS:
                     Statistics: Num rows: 174243235 Data size: 23692040863 
Basic stats: COMPLETE Column stats: NONE
         Reducer 16 
             Execution mode: vectorized
-            Reduce Vectorization:
-                enabled: true
-                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
             Reduce Operator Tree:
               Group By Operator
-                Group By Vectorization:
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    keyExpressions: col 0:string, col 1:string, col 2:string
-                    native: false
-                    vectorProcessingMode: MERGE_PARTIAL
-                    projectedOutputColumnNums: []
                 keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2
@@ -586,21 +342,9 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col1 (type: string), _col0 (type: string), 
_col2 (type: string)
                   outputColumnNames: _col0, _col1, _col2
-                  Select Vectorization:
-                      className: VectorSelectOperator
-                      native: true
-                      projectedOutputColumnNums: [1, 0, 2]
                   Statistics: Num rows: 87121617 Data size: 11846020363 Basic 
stats: COMPLETE Column stats: NONE
                   Group By Operator
                     aggregations: count()
-                    Group By Vectorization:
-                        aggregators: VectorUDAFCountStar(*) -> bigint
-                        className: VectorGroupByOperator
-                        groupByMode: COMPLETE
-                        keyExpressions: col 0:string, col 1:string, col 
2:string
-                        native: false
-                        vectorProcessingMode: STREAMING
-                        projectedOutputColumnNums: [0]
                     keys: _col1 (type: string), _col0 (type: string), _col2 
(type: string)
                     mode: complete
                     outputColumnNames: _col0, _col1, _col2, _col3
@@ -608,31 +352,13 @@ STAGE PLANS:
                     Select Operator
                       expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string), 1L (type: bigint), _col3 (type: bigint)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                      Select Vectorization:
-                          className: VectorSelectOperator
-                          native: true
-                          projectedOutputColumnNums: [0, 1, 2, 4, 3]
-                          selectExpressions: ConstantVectorExpression(val 1) 
-> 4:bigint
                       Statistics: Num rows: 43560808 Data size: 5923010113 
Basic stats: COMPLETE Column stats: NONE
                       Select Operator
                         expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: string), _col4 (type: bigint), (_col3 * _col4) (type: 
bigint)
                         outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                        Select Vectorization:
-                            className: VectorSelectOperator
-                            native: true
-                            projectedOutputColumnNums: [0, 1, 2, 3, 5]
-                            selectExpressions: LongColMultiplyLongColumn(col 
4:bigint, col 3:bigint) -> 5:bigint
                         Statistics: Num rows: 54450625 Data size: 7055042151 
Basic stats: COMPLETE Column stats: NONE
                         Group By Operator
                           aggregations: sum(_col3), sum(_col4)
-                          Group By Vectorization:
-                              aggregators: VectorUDAFSumLong(col 3:bigint) -> 
bigint, VectorUDAFSumLong(col 5:bigint) -> bigint
-                              className: VectorGroupByOperator
-                              groupByMode: HASH
-                              keyExpressions: col 0:string, col 1:string, col 
2:string
-                              native: false
-                              vectorProcessingMode: HASH
-                              projectedOutputColumnNums: [0, 1]
                           keys: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2, _col3, _col4
@@ -641,18 +367,9 @@ STAGE PLANS:
                             key expressions: _col0 (type: string), _col1 
(type: string), _col2 (type: string)
                             sort order: +++
                             Map-reduce partition columns: _col0 (type: 
string), _col1 (type: string), _col2 (type: string)
-                            Reduce Sink Vectorization:
-                                className: VectorReduceSinkMultiKeyOperator
-                                native: true
-                                nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                             Statistics: Num rows: 54450625 Data size: 
7055042151 Basic stats: COMPLETE Column stats: NONE
                             value expressions: _col3 (type: bigint), _col4 
(type: bigint)
         Reducer 2 
-            Reduce Vectorization:
-                enabled: true
-                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                notVectorizedReason: Tagging not supported
-                vectorized: false
             Reduce Operator Tree:
               Join Operator
                 condition map:
@@ -674,21 +391,8 @@ STAGE PLANS:
                     Statistics: Num rows: 696954748 Data size: 61485550191 
Basic stats: COMPLETE Column stats: NONE
         Reducer 3 
             Execution mode: vectorized
-            Reduce Vectorization:
-                enabled: true
-                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
             Reduce Operator Tree:
               Group By Operator
-                Group By Vectorization:
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    keyExpressions: col 0:string, col 1:string, col 2:string
-                    native: false
-                    vectorProcessingMode: MERGE_PARTIAL
-                    projectedOutputColumnNums: []
                 keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2
@@ -696,21 +400,9 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col1 (type: string), _col0 (type: string), 
_col2 (type: string)
                   outputColumnNames: _col0, _col1, _col2
-                  Select Vectorization:
-                      className: VectorSelectOperator
-                      native: true
-                      projectedOutputColumnNums: [1, 0, 2]
                   Statistics: Num rows: 348477374 Data size: 30742775095 Basic 
stats: COMPLETE Column stats: NONE
                   Group By Operator
                     aggregations: count()
-                    Group By Vectorization:
-                        aggregators: VectorUDAFCountStar(*) -> bigint
-                        className: VectorGroupByOperator
-                        groupByMode: COMPLETE
-                        keyExpressions: col 0:string, col 1:string, col 
2:string
-                        native: false
-                        vectorProcessingMode: STREAMING
-                        projectedOutputColumnNums: [0]
                     keys: _col1 (type: string), _col0 (type: string), _col2 
(type: string)
                     mode: complete
                     outputColumnNames: _col0, _col1, _col2, _col3
@@ -718,31 +410,13 @@ STAGE PLANS:
                     Select Operator
                       expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string), 2L (type: bigint), _col3 (type: bigint)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                      Select Vectorization:
-                          className: VectorSelectOperator
-                          native: true
-                          projectedOutputColumnNums: [0, 1, 2, 4, 3]
-                          selectExpressions: ConstantVectorExpression(val 2) 
-> 4:bigint
                       Statistics: Num rows: 174238687 Data size: 15371387547 
Basic stats: COMPLETE Column stats: NONE
                       Select Operator
                         expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: string), _col4 (type: bigint), (_col3 * _col4) (type: 
bigint)
                         outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                        Select Vectorization:
-                            className: VectorSelectOperator
-                            native: true
-                            projectedOutputColumnNums: [0, 1, 2, 3, 5]
-                            selectExpressions: LongColMultiplyLongColumn(col 
4:bigint, col 3:bigint) -> 5:bigint
                         Statistics: Num rows: 261355616 Data size: 27168769766 
Basic stats: COMPLETE Column stats: NONE
                         Group By Operator
                           aggregations: sum(_col3), sum(_col4)
-                          Group By Vectorization:
-                              aggregators: VectorUDAFSumLong(col 3:bigint) -> 
bigint, VectorUDAFSumLong(col 5:bigint) -> bigint
-                              className: VectorGroupByOperator
-                              groupByMode: HASH
-                              keyExpressions: col 0:string, col 1:string, col 
2:string
-                              native: false
-                              vectorProcessingMode: HASH
-                              projectedOutputColumnNums: [0, 1]
                           keys: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2, _col3, _col4
@@ -751,60 +425,26 @@ STAGE PLANS:
                             key expressions: _col0 (type: string), _col1 
(type: string), _col2 (type: string)
                             sort order: +++
                             Map-reduce partition columns: _col0 (type: 
string), _col1 (type: string), _col2 (type: string)
-                            Reduce Sink Vectorization:
-                                className: VectorReduceSinkMultiKeyOperator
-                                native: true
-                                nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                             Statistics: Num rows: 261355616 Data size: 
27168769766 Basic stats: COMPLETE Column stats: NONE
                             value expressions: _col3 (type: bigint), _col4 
(type: bigint)
         Reducer 4 
             Execution mode: vectorized
-            Reduce Vectorization:
-                enabled: true
-                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0), sum(VALUE._col1)
-                Group By Vectorization:
-                    aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, 
VectorUDAFSumLong(col 4:bigint) -> bigint
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    keyExpressions: col 0:string, col 1:string, col 2:string
-                    native: false
-                    vectorProcessingMode: MERGE_PARTIAL
-                    projectedOutputColumnNums: [0, 1]
                 keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
                 Statistics: Num rows: 130677808 Data size: 13584384883 Basic 
stats: COMPLETE Column stats: NONE
                 Filter Operator
-                  Filter Vectorization:
-                      className: VectorFilterOperator
-                      native: true
-                      predicateExpression: FilterExprAndExpr(children: 
FilterLongColGreaterLongScalar(col 3:bigint, val 0), 
FilterLongColEqualLongColumn(col 5:bigint, col 4:bigint)(children: 
LongColMultiplyLongScalar(col 3:bigint, val 2) -> 5:bigint))
                   predicate: (((_col3 * 2) = _col4) and (_col3 > 0L)) (type: 
boolean)
                   Statistics: Num rows: 21779634 Data size: 2264064077 Basic 
stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
                     outputColumnNames: _col0, _col1, _col2
-                    Select Vectorization:
-                        className: VectorSelectOperator
-                        native: true
-                        projectedOutputColumnNums: [0, 1, 2]
                     Statistics: Num rows: 21779634 Data size: 2264064077 Basic 
stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: count()
-                      Group By Vectorization:
-                          aggregators: VectorUDAFCountStar(*) -> bigint
-                          className: VectorGroupByOperator
-                          groupByMode: COMPLETE
-                          keyExpressions: col 0:string, col 1:string, col 
2:string
-                          native: false
-                          vectorProcessingMode: STREAMING
-                          projectedOutputColumnNums: [0]
                       keys: _col0 (type: string), _col1 (type: string), _col2 
(type: string)
                       mode: complete
                       outputColumnNames: _col0, _col1, _col2, _col3
@@ -812,31 +452,13 @@ STAGE PLANS:
                       Select Operator
                         expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: string), 2L (type: bigint), _col3 (type: bigint)
                         outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                        Select Vectorization:
-                            className: VectorSelectOperator
-                            native: true
-                            projectedOutputColumnNums: [0, 1, 2, 4, 3]
-                            selectExpressions: ConstantVectorExpression(val 2) 
-> 4:bigint
                         Statistics: Num rows: 10889817 Data size: 1132032038 
Basic stats: COMPLETE Column stats: NONE
                         Select Operator
                           expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: string), _col4 (type: bigint), (_col3 * _col4) (type: 
bigint)
                           outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                          Select Vectorization:
-                              className: VectorSelectOperator
-                              native: true
-                              projectedOutputColumnNums: [0, 1, 2, 3, 5]
-                              selectExpressions: LongColMultiplyLongColumn(col 
4:bigint, col 3:bigint) -> 5:bigint
                           Statistics: Num rows: 54450625 Data size: 7055042151 
Basic stats: COMPLETE Column stats: NONE
                           Group By Operator
                             aggregations: sum(_col3), sum(_col4)
-                            Group By Vectorization:
-                                aggregators: VectorUDAFSumLong(col 3:bigint) 
-> bigint, VectorUDAFSumLong(col 5:bigint) -> bigint
-                                className: VectorGroupByOperator
-                                groupByMode: HASH
-                                keyExpressions: col 0:string, col 1:string, 
col 2:string
-                                native: false
-                                vectorProcessingMode: HASH
-                                projectedOutputColumnNums: [0, 1]
                             keys: _col0 (type: string), _col1 (type: string), 
_col2 (type: string)
                             mode: hash
                             outputColumnNames: _col0, _col1, _col2, _col3, 
_col4
@@ -845,31 +467,13 @@ STAGE PLANS:
                               key expressions: _col0 (type: string), _col1 
(type: string), _col2 (type: string)
                               sort order: +++
                               Map-reduce partition columns: _col0 (type: 
string), _col1 (type: string), _col2 (type: string)
-                              Reduce Sink Vectorization:
-                                  className: VectorReduceSinkMultiKeyOperator
-                                  native: true
-                                  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                               Statistics: Num rows: 54450625 Data size: 
7055042151 Basic stats: COMPLETE Column stats: NONE
                               value expressions: _col3 (type: bigint), _col4 
(type: bigint)
         Reducer 5 
             Execution mode: vectorized
-            Reduce Vectorization:
-                enabled: true
-                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0), sum(VALUE._col1)
-                Group By Vectorization:
-                    aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, 
VectorUDAFSumLong(col 4:bigint) -> bigint
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    keyExpressions: col 0:string, col 1:string, col 2:string
-                    native: false
-                    vectorProcessingMode: MERGE_PARTIAL
-                    projectedOutputColumnNums: [0, 1]
                 keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
@@ -877,70 +481,31 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col3 (type: bigint), _col4 (type: bigint)
                   outputColumnNames: _col3, _col4
-                  Select Vectorization:
-                      className: VectorSelectOperator
-                      native: true
-                      projectedOutputColumnNums: [3, 4]
                   Statistics: Num rows: 27225312 Data size: 3527521010 Basic 
stats: COMPLETE Column stats: NONE
                   Filter Operator
-                    Filter Vectorization:
-                        className: VectorFilterOperator
-                        native: true
-                        predicateExpression: FilterExprAndExpr(children: 
FilterLongColGreaterLongScalar(col 3:bigint, val 0), 
FilterLongColEqualLongColumn(col 5:bigint, col 4:bigint)(children: 
LongColMultiplyLongScalar(col 3:bigint, val 2) -> 5:bigint))
                     predicate: (((_col3 * 2) = _col4) and (_col3 > 0L)) (type: 
boolean)
                     Statistics: Num rows: 4537552 Data size: 587920168 Basic 
stats: COMPLETE Column stats: NONE
                     Select Operator
-                      Select Vectorization:
-                          className: VectorSelectOperator
-                          native: true
-                          projectedOutputColumnNums: []
                       Statistics: Num rows: 4537552 Data size: 587920168 Basic 
stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: count()
-                        Group By Vectorization:
-                            aggregators: VectorUDAFCountStar(*) -> bigint
-                            className: VectorGroupByOperator
-                            groupByMode: HASH
-                            native: false
-                            vectorProcessingMode: HASH
-                            projectedOutputColumnNums: [0]
                         mode: hash
                         outputColumnNames: _col0
                         Statistics: Num rows: 1 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
                         Reduce Output Operator
                           sort order: 
-                          Reduce Sink Vectorization:
-                              className: VectorReduceSinkEmptyKeyOperator
-                              native: true
-                              nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                           Statistics: Num rows: 1 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
                           value expressions: _col0 (type: bigint)
         Reducer 6 
             Execution mode: vectorized
-            Reduce Vectorization:
-                enabled: true
-                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)
-                Group By Vectorization:
-                    aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint
-                    className: VectorGroupByOperator
-                    groupByMode: MERGEPARTIAL
-                    native: false
-                    vectorProcessingMode: GLOBAL
-                    projectedOutputColumnNums: [0]
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
                 File Output Operator
                   compressed: false
-                  File Sink Vectorization:
-                      className: VectorFileSinkOperator
-                      native: false
                   Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
                   table:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat

[01/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

Reply via email to