[01/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

sershe Thu, 13 Oct 2016 17:19:54 -0700

Repository: hive
Updated Branches:
  refs/heads/hive-14535 b9e815722 -> eacf9f9b6



http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out 
b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
index 22fe7cd..b297a7d 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
@@ -1,7 +1,7 @@
 PREHOOK: query: -- SORT_QUERY_RESULTS
 
 -- Use ORDER BY clauses to generate 2 stages.
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
 SELECT MIN(ctinyint) as c1,
        MAX(ctinyint),
        COUNT(ctinyint),
@@ -12,7 +12,7 @@ PREHOOK: type: QUERY
 POSTHOOK: query: -- SORT_QUERY_RESULTS
 
 -- Use ORDER BY clauses to generate 2 stages.
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
 SELECT MIN(ctinyint) as c1,
        MAX(ctinyint),
        COUNT(ctinyint),
@@ -20,6 +20,10 @@ SELECT MIN(ctinyint) as c1,
 FROM   alltypesorc
 ORDER BY c1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -37,42 +41,100 @@ STAGE PLANS:
                 TableScan
                   alias: alltypesorc
                   Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
                   Select Operator
                     expressions: ctinyint (type: tinyint)
                     outputColumnNames: ctinyint
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [0]
                     Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: min(ctinyint), max(ctinyint), 
count(ctinyint), count()
+                      Group By Vectorization:
+                          aggregators: VectorUDAFMinLong(col 0) -> tinyint, 
VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFCount(col 0) -> bigint, 
VectorUDAFCountStar(*) -> bigint
+                          className: VectorGroupByOperator
+                          vectorOutput: true
+                          native: false
+                          projectedOutputColumns: [0, 1, 2, 3]
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3
                       Statistics: Num rows: 1 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
                         sort order: 
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkOperator
+                            native: false
+                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+                            nativeConditionsNotMet: Uniform Hash IS false
                         Statistics: Num rows: 1 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col0 (type: tinyint), _col1 (type: 
tinyint), _col2 (type: bigint), _col3 (type: bigint)
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
             Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Group By Operator
                 aggregations: min(VALUE._col0), max(VALUE._col1), 
count(VALUE._col2), count(VALUE._col3)
+                Group By Vectorization:
+                    aggregators: VectorUDAFMinLong(col 0) -> tinyint, 
VectorUDAFMaxLong(col 1) -> tinyint, VectorUDAFCountMerge(col 2) -> bigint, 
VectorUDAFCountMerge(col 3) -> bigint
+                    className: VectorGroupByOperator
+                    vectorOutput: true
+                    native: false
+                    projectedOutputColumns: [0, 1, 2, 3]
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3
                 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: tinyint)
                   sort order: +
+                  Reduce Sink Vectorization:
+                      className: VectorReduceSinkOperator
+                      native: false
+                      nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+                      nativeConditionsNotMet: Uniform Hash IS false
                   Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
                   value expressions: _col1 (type: tinyint), _col2 (type: 
bigint), _col3 (type: bigint)
         Reducer 3 
             Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 
(type: tinyint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint)
                 outputColumnNames: _col0, _col1, _col2, _col3
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [0, 1, 2, 3]
                 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
                 File Output Operator
                   compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
                   Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
                   table:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -104,16 +166,20 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
 -64    62      9173    12288
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT SUM(ctinyint) as c1
 FROM   alltypesorc
 ORDER BY c1
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT SUM(ctinyint) as c1
 FROM   alltypesorc
 ORDER BY c1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -131,41 +197,99 @@ STAGE PLANS:
                 TableScan
                   alias: alltypesorc
                   Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
                   Select Operator
                     expressions: ctinyint (type: tinyint)
                     outputColumnNames: ctinyint
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [0]
                     Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: sum(ctinyint)
+                      Group By Vectorization:
+                          aggregators: VectorUDAFSumLong(col 0) -> bigint
+                          className: VectorGroupByOperator
+                          vectorOutput: true
+                          native: false
+                          projectedOutputColumns: [0]
                       mode: hash
                       outputColumnNames: _col0
                       Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
                         sort order: 
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkOperator
+                            native: false
+                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+                            nativeConditionsNotMet: Uniform Hash IS false
                         Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col0 (type: bigint)
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
             Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFSumLong(col 0) -> bigint
+                    className: VectorGroupByOperator
+                    vectorOutput: true
+                    native: false
+                    projectedOutputColumns: [0]
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: bigint)
                   sort order: +
+                  Reduce Sink Vectorization:
+                      className: VectorReduceSinkOperator
+                      native: false
+                      nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+                      nativeConditionsNotMet: Uniform Hash IS false
                   Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
         Reducer 3 
             Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: bigint)
                 outputColumnNames: _col0
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [0]
                 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
                 File Output Operator
                   compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
                   Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
                   table:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -191,7 +315,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
 -39856
-PREHOOK: query: EXPLAIN 
+PREHOOK: query: EXPLAIN VECTORIZATION 
 SELECT
   avg(ctinyint) as c1,
   variance(ctinyint),
@@ -204,7 +328,7 @@ SELECT
 FROM alltypesorc
 ORDER BY c1
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN 
+POSTHOOK: query: EXPLAIN VECTORIZATION 
 SELECT
   avg(ctinyint) as c1,
   variance(ctinyint),
@@ -217,6 +341,10 @@ SELECT
 FROM alltypesorc
 ORDER BY c1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -248,7 +376,20 @@ STAGE PLANS:
                         Statistics: Num rows: 1 Data size: 636 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col0 (type: 
struct<count:bigint,sum:double,input:tinyint>), _col1 (type: 
struct<count:bigint,sum:double,variance:double>), _col2 (type: 
struct<count:bigint,sum:double,variance:double>), _col3 (type: 
struct<count:bigint,sum:double,variance:double>), _col4 (type: 
struct<count:bigint,sum:double,variance:double>), _col5 (type: 
struct<count:bigint,sum:double,variance:double>), _col6 (type: 
struct<count:bigint,sum:double,variance:double>), _col7 (type: 
struct<count:bigint,sum:double,variance:double>)
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: false
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                notVectorizedReason: Aggregation Function UDF avg parameter 
expression for GROUPBY operator: Data type 
struct<count:bigint,sum:double,input:tinyint> of Column[VALUE._col0] not 
supported
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: avg(VALUE._col0), variance(VALUE._col1), 
var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), 
stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7)
@@ -262,6 +403,13 @@ STAGE PLANS:
                   value expressions: _col1 (type: double), _col2 (type: 
double), _col3 (type: double), _col4 (type: double), _col5 (type: double), 
_col6 (type: double), _col7 (type: double)
         Reducer 3 
             Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 
(type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), 
VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: 
double), VALUE._col6 (type: double)
@@ -310,7 +458,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
 -4.344925324321378     1158.3003004768184      1158.3003004768184      
1158.4265870337827      34.033811136527426      34.033811136527426      
34.033811136527426      34.03566639620536
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT MIN(cbigint) as c1,
        MAX(cbigint),
        COUNT(cbigint),
@@ -318,7 +466,7 @@ SELECT MIN(cbigint) as c1,
 FROM   alltypesorc
 ORDER BY c1
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT MIN(cbigint) as c1,
        MAX(cbigint),
        COUNT(cbigint),
@@ -326,6 +474,10 @@ SELECT MIN(cbigint) as c1,
 FROM   alltypesorc
 ORDER BY c1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -343,42 +495,100 @@ STAGE PLANS:
                 TableScan
                   alias: alltypesorc
                   Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
                   Select Operator
                     expressions: cbigint (type: bigint)
                     outputColumnNames: cbigint
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [3]
                     Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: min(cbigint), max(cbigint), 
count(cbigint), count()
+                      Group By Vectorization:
+                          aggregators: VectorUDAFMinLong(col 3) -> bigint, 
VectorUDAFMaxLong(col 3) -> bigint, VectorUDAFCount(col 3) -> bigint, 
VectorUDAFCountStar(*) -> bigint
+                          className: VectorGroupByOperator
+                          vectorOutput: true
+                          native: false
+                          projectedOutputColumns: [0, 1, 2, 3]
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3
                       Statistics: Num rows: 1 Data size: 32 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
                         sort order: 
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkOperator
+                            native: false
+                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+                            nativeConditionsNotMet: Uniform Hash IS false
                         Statistics: Num rows: 1 Data size: 32 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col0 (type: bigint), _col1 (type: 
bigint), _col2 (type: bigint), _col3 (type: bigint)
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
             Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Group By Operator
                 aggregations: min(VALUE._col0), max(VALUE._col1), 
count(VALUE._col2), count(VALUE._col3)
+                Group By Vectorization:
+                    aggregators: VectorUDAFMinLong(col 0) -> bigint, 
VectorUDAFMaxLong(col 1) -> bigint, VectorUDAFCountMerge(col 2) -> bigint, 
VectorUDAFCountMerge(col 3) -> bigint
+                    className: VectorGroupByOperator
+                    vectorOutput: true
+                    native: false
+                    projectedOutputColumns: [0, 1, 2, 3]
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3
                 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: bigint)
                   sort order: +
+                  Reduce Sink Vectorization:
+                      className: VectorReduceSinkOperator
+                      native: false
+                      nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+                      nativeConditionsNotMet: Uniform Hash IS false
                   Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE 
Column stats: NONE
                   value expressions: _col1 (type: bigint), _col2 (type: 
bigint), _col3 (type: bigint)
         Reducer 3 
             Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 
(type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint)
                 outputColumnNames: _col0, _col1, _col2, _col3
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [0, 1, 2, 3]
                 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE 
Column stats: NONE
                 File Output Operator
                   compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
                   Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE 
Column stats: NONE
                   table:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -410,16 +620,20 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
 -2147311592    2145498388      9173    12288
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT SUM(cbigint) as c1
 FROM   alltypesorc
 ORDER BY c1
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT SUM(cbigint) as c1
 FROM   alltypesorc
 ORDER BY c1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -437,41 +651,99 @@ STAGE PLANS:
                 TableScan
                   alias: alltypesorc
                   Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
                   Select Operator
                     expressions: cbigint (type: bigint)
                     outputColumnNames: cbigint
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [3]
                     Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: sum(cbigint)
+                      Group By Vectorization:
+                          aggregators: VectorUDAFSumLong(col 3) -> bigint
+                          className: VectorGroupByOperator
+                          vectorOutput: true
+                          native: false
+                          projectedOutputColumns: [0]
                       mode: hash
                       outputColumnNames: _col0
                       Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
                         sort order: 
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkOperator
+                            native: false
+                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+                            nativeConditionsNotMet: Uniform Hash IS false
                         Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col0 (type: bigint)
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
             Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFSumLong(col 0) -> bigint
+                    className: VectorGroupByOperator
+                    vectorOutput: true
+                    native: false
+                    projectedOutputColumns: [0]
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: bigint)
                   sort order: +
+                  Reduce Sink Vectorization:
+                      className: VectorReduceSinkOperator
+                      native: false
+                      nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+                      nativeConditionsNotMet: Uniform Hash IS false
                   Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
         Reducer 3 
             Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: bigint)
                 outputColumnNames: _col0
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [0]
                 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
                 File Output Operator
                   compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
                   Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
                   table:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -497,7 +769,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
 -1698460028409
-PREHOOK: query: EXPLAIN 
+PREHOOK: query: EXPLAIN VECTORIZATION 
 SELECT
   avg(cbigint) as c1,
   variance(cbigint),
@@ -510,7 +782,7 @@ SELECT
 FROM alltypesorc
 ORDER BY c1
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN 
+POSTHOOK: query: EXPLAIN VECTORIZATION 
 SELECT
   avg(cbigint) as c1,
   variance(cbigint),
@@ -523,6 +795,10 @@ SELECT
 FROM alltypesorc
 ORDER BY c1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -554,7 +830,20 @@ STAGE PLANS:
                         Statistics: Num rows: 1 Data size: 640 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col0 (type: 
struct<count:bigint,sum:double,input:bigint>), _col1 (type: 
struct<count:bigint,sum:double,variance:double>), _col2 (type: 
struct<count:bigint,sum:double,variance:double>), _col3 (type: 
struct<count:bigint,sum:double,variance:double>), _col4 (type: 
struct<count:bigint,sum:double,variance:double>), _col5 (type: 
struct<count:bigint,sum:double,variance:double>), _col6 (type: 
struct<count:bigint,sum:double,variance:double>), _col7 (type: 
struct<count:bigint,sum:double,variance:double>)
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: false
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                notVectorizedReason: Aggregation Function UDF avg parameter 
expression for GROUPBY operator: Data type 
struct<count:bigint,sum:double,input:bigint> of Column[VALUE._col0] not 
supported
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: avg(VALUE._col0), variance(VALUE._col1), 
var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), 
stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7)
@@ -568,6 +857,13 @@ STAGE PLANS:
                   value expressions: _col1 (type: double), _col2 (type: 
double), _col3 (type: double), _col4 (type: double), _col5 (type: double), 
_col6 (type: double), _col7 (type: double)
         Reducer 3 
             Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 
(type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), 
VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: 
double), VALUE._col6 (type: double)
@@ -616,7 +912,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
 -1.8515862077935246E8  2.07689300543081907E18  2.07689300543081907E18  
2.07711944383088768E18  1.441142951074188E9     1.441142951074188E9     
1.441142951074188E9     1.4412215110214279E9
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT MIN(cfloat) as c1,
        MAX(cfloat),
        COUNT(cfloat),
@@ -624,7 +920,7 @@ SELECT MIN(cfloat) as c1,
 FROM   alltypesorc
 ORDER BY c1
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT MIN(cfloat) as c1,
        MAX(cfloat),
        COUNT(cfloat),
@@ -632,6 +928,10 @@ SELECT MIN(cfloat) as c1,
 FROM   alltypesorc
 ORDER BY c1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -649,42 +949,100 @@ STAGE PLANS:
                 TableScan
                   alias: alltypesorc
                   Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
                   Select Operator
                     expressions: cfloat (type: float)
                     outputColumnNames: cfloat
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [4]
                     Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: min(cfloat), max(cfloat), count(cfloat), 
count()
+                      Group By Vectorization:
+                          aggregators: VectorUDAFMinDouble(col 4) -> float, 
VectorUDAFMaxDouble(col 4) -> float, VectorUDAFCount(col 4) -> bigint, 
VectorUDAFCountStar(*) -> bigint
+                          className: VectorGroupByOperator
+                          vectorOutput: true
+                          native: false
+                          projectedOutputColumns: [0, 1, 2, 3]
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3
                       Statistics: Num rows: 1 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
                         sort order: 
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkOperator
+                            native: false
+                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+                            nativeConditionsNotMet: Uniform Hash IS false
                         Statistics: Num rows: 1 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col0 (type: float), _col1 (type: 
float), _col2 (type: bigint), _col3 (type: bigint)
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
             Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Group By Operator
                 aggregations: min(VALUE._col0), max(VALUE._col1), 
count(VALUE._col2), count(VALUE._col3)
+                Group By Vectorization:
+                    aggregators: VectorUDAFMinDouble(col 0) -> float, 
VectorUDAFMaxDouble(col 1) -> float, VectorUDAFCountMerge(col 2) -> bigint, 
VectorUDAFCountMerge(col 3) -> bigint
+                    className: VectorGroupByOperator
+                    vectorOutput: true
+                    native: false
+                    projectedOutputColumns: [0, 1, 2, 3]
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3
                 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: float)
                   sort order: +
+                  Reduce Sink Vectorization:
+                      className: VectorReduceSinkOperator
+                      native: false
+                      nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+                      nativeConditionsNotMet: Uniform Hash IS false
                   Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
                   value expressions: _col1 (type: float), _col2 (type: 
bigint), _col3 (type: bigint)
         Reducer 3 
             Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 
(type: float), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint)
                 outputColumnNames: _col0, _col1, _col2, _col3
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [0, 1, 2, 3]
                 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
                 File Output Operator
                   compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
                   Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
                   table:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -716,16 +1074,20 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
 -64.0  79.553  9173    12288
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT SUM(cfloat) as c1
 FROM   alltypesorc
 ORDER BY c1
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT SUM(cfloat) as c1
 FROM   alltypesorc
 ORDER BY c1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -743,41 +1105,99 @@ STAGE PLANS:
                 TableScan
                   alias: alltypesorc
                   Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
                   Select Operator
                     expressions: cfloat (type: float)
                     outputColumnNames: cfloat
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [4]
                     Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: sum(cfloat)
+                      Group By Vectorization:
+                          aggregators: VectorUDAFSumDouble(col 4) -> double
+                          className: VectorGroupByOperator
+                          vectorOutput: true
+                          native: false
+                          projectedOutputColumns: [0]
                       mode: hash
                       outputColumnNames: _col0
                       Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
                         sort order: 
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkOperator
+                            native: false
+                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+                            nativeConditionsNotMet: Uniform Hash IS false
                         Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col0 (type: double)
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
             Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFSumDouble(col 0) -> double
+                    className: VectorGroupByOperator
+                    vectorOutput: true
+                    native: false
+                    projectedOutputColumns: [0]
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: double)
                   sort order: +
+                  Reduce Sink Vectorization:
+                      className: VectorReduceSinkOperator
+                      native: false
+                      nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+                      nativeConditionsNotMet: Uniform Hash IS false
                   Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
         Reducer 3 
             Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: double)
                 outputColumnNames: _col0
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [0]
                 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
                 File Output Operator
                   compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
                   Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
                   table:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -803,7 +1223,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
 -39479.635992884636
-PREHOOK: query: EXPLAIN 
+PREHOOK: query: EXPLAIN VECTORIZATION 
 SELECT
   avg(cfloat) as c1,
   variance(cfloat),
@@ -816,7 +1236,7 @@ SELECT
 FROM alltypesorc
 ORDER BY c1
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN 
+POSTHOOK: query: EXPLAIN VECTORIZATION 
 SELECT
   avg(cfloat) as c1,
   variance(cfloat),
@@ -829,6 +1249,10 @@ SELECT
 FROM alltypesorc
 ORDER BY c1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -860,7 +1284,20 @@ STAGE PLANS:
                         Statistics: Num rows: 1 Data size: 636 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col0 (type: 
struct<count:bigint,sum:double,input:float>), _col1 (type: 
struct<count:bigint,sum:double,variance:double>), _col2 (type: 
struct<count:bigint,sum:double,variance:double>), _col3 (type: 
struct<count:bigint,sum:double,variance:double>), _col4 (type: 
struct<count:bigint,sum:double,variance:double>), _col5 (type: 
struct<count:bigint,sum:double,variance:double>), _col6 (type: 
struct<count:bigint,sum:double,variance:double>), _col7 (type: 
struct<count:bigint,sum:double,variance:double>)
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: false
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                notVectorizedReason: Aggregation Function UDF avg parameter 
expression for GROUPBY operator: Data type 
struct<count:bigint,sum:double,input:float> of Column[VALUE._col0] not supported
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: avg(VALUE._col0), variance(VALUE._col1), 
var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), 
stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7)
@@ -874,6 +1311,13 @@ STAGE PLANS:
                   value expressions: _col1 (type: double), _col2 (type: 
double), _col3 (type: double), _col4 (type: double), _col5 (type: double), 
_col6 (type: double), _col7 (type: double)
         Reducer 3 
             Execution mode: vectorized
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 
(type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), 
VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: 
double), VALUE._col6 (type: double)
@@ -923,7 +1367,7 @@ POSTHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
 -4.303895780321011     1163.8972588604984      1163.8972588604984      
1164.0241556397025      34.115938487171924      34.115938487171924      
34.115938487171924      34.11779822379666
 WARNING: Comparing a bigint and a double may result in a loss of precision.
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT AVG(cbigint),
        (-(AVG(cbigint))),
        (-6432 + AVG(cbigint)),
@@ -950,7 +1394,7 @@ WHERE  (((cstring2 LIKE '%b%')
             AND ((cboolean2 = 1)
                  AND (3569 = ctinyint))))
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT AVG(cbigint),
        (-(AVG(cbigint))),
        (-6432 + AVG(cbigint)),
@@ -977,6 +1421,10 @@ WHERE  (((cstring2 LIKE '%b%')
             AND ((cboolean2 = 1)
                  AND (3569 = ctinyint))))
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -993,15 +1441,33 @@ STAGE PLANS:
                 TableScan
                   alias: alltypesorc
                   Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprOrExpr(children: 
FilterStringColLikeStringScalar(col 7, pattern %b%) -> boolean, 
FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 12)(children: 
CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean, 
FilterDoubleColLessDoubleColumn(col 13, col 5)(children: CastLongToDouble(col 
3) -> 13:double) -> boolean, FilterExprAndExpr(children: 
FilterLongColGreaterEqualLongColumn(col 0, col 1)(children: col 0) -> boolean, 
FilterLongColEqualLongScalar(col 11, val 1) -> boolean, 
FilterLongScalarEqualLongColumn(val 3569, col 0)(children: col 0) -> boolean) 
-> boolean) -> boolean
                     predicate: ((cstring2 like '%b%') or (79.553 <> CAST( cint 
AS decimal(13,3))) or (UDFToDouble(cbigint) < cdouble) or 
((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (3569 = 
UDFToInteger(ctinyint)))) (type: boolean)
                     Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cbigint (type: bigint), cfloat (type: 
float), ctinyint (type: tinyint)
                       outputColumnNames: cbigint, cfloat, ctinyint
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumns: [3, 4, 0]
                       Statistics: Num rows: 12288 Data size: 377237 Basic 
stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: avg(cbigint), stddev_pop(cbigint), 
var_samp(cbigint), count(), sum(cfloat), min(ctinyint)
+                        Group By Vectorization:
+                            aggregators: VectorUDAFAvgLong(col 3) -> 
struct<count:bigint,sum:double>, VectorUDAFStdPopLong(col 3) -> 
struct<count:bigint,sum:double,variance:double>, VectorUDAFVarSampLong(col 3) 
-> struct<count:bigint,sum:double,variance:double>, VectorUDAFCountStar(*) -> 
bigint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFMinLong(col 0) -> 
tinyint
+                            className: VectorGroupByOperator
+                            vectorOutput: false
+                            native: false
+                            projectedOutputColumns: [0, 1, 2, 3, 4, 5]
+                            vectorOutputConditionsNotMet: Vector output of 
VectorUDAFAvgLong(col 3) -> struct<count:bigint,sum:double> output type STRUCT 
requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 3) -> 
struct<count:bigint,sum:double,variance:double> output type STRUCT requires 
PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 3) -> 
struct<count:bigint,sum:double,variance:double> output type STRUCT requires 
PRIMITIVE IS false
                         mode: hash
                         outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
                         Statistics: Num rows: 1 Data size: 260 Basic stats: 
COMPLETE Column stats: NONE
@@ -1010,7 +1476,20 @@ STAGE PLANS:
                           Statistics: Num rows: 1 Data size: 260 Basic stats: 
COMPLETE Column stats: NONE
                           value expressions: _col0 (type: 
struct<count:bigint,sum:double,input:bigint>), _col1 (type: 
struct<count:bigint,sum:double,variance:double>), _col2 (type: 
struct<count:bigint,sum:double,variance:double>), _col3 (type: bigint), _col4 
(type: double), _col5 (type: tinyint)
             Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: false
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+                notVectorizedReason: Aggregation Function UDF avg parameter 
expression for GROUPBY operator: Data type 
struct<count:bigint,sum:double,input:bigint> of Column[VALUE._col0] not 
supported
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1), 
var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5)

[01/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

Reply via email to