[07/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

mmccline Thu, 13 Oct 2016 03:50:44 -0700

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
index ceaac4f..636463b 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
@@ -19,10 +19,10 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@values__tmp__table__1
 POSTHOOK: Output: default@test
 POSTHOOK: Lineage: test.ts EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT ts FROM test
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT ts FROM test
 POSTHOOK: type: QUERY
 Plan optimized by CBO.
@@ -48,10 +48,10 @@ POSTHOOK: Input: default@test
 #### A masked pattern was here ####
 0001-01-01 00:00:00
 9999-12-31 23:59:59.999999999
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test
 POSTHOOK: type: QUERY
 Plan optimized by CBO.
@@ -87,10 +87,10 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test
 #### A masked pattern was here ####
 0001-01-01 00:00:00    9999-12-31 23:59:59.999999999   3652060 
23:59:59.999999999
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT ts FROM test
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT ts FROM test
 POSTHOOK: type: QUERY
 Plan optimized by CBO.
@@ -116,10 +116,10 @@ POSTHOOK: Input: default@test
 #### A masked pattern was here ####
 0001-01-01 00:00:00
 9999-12-31 23:59:59.999999999
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test
 POSTHOOK: type: QUERY
 Plan optimized by CBO.


http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
index 4092911..ae59b06 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
@@ -73,7 +73,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
 POSTHOOK: Output: default@alltypesorc_wrong
 POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE []
-PREHOOK: query: EXPLAIN SELECT
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
   to_unix_timestamp(ctimestamp1) AS c1,
   year(ctimestamp1),
   month(ctimestamp1),
@@ -86,7 +86,7 @@ PREHOOK: query: EXPLAIN SELECT
 FROM alltypesorc_string
 ORDER BY c1
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
   to_unix_timestamp(ctimestamp1) AS c1,
   year(ctimestamp1),
   month(ctimestamp1),
@@ -99,6 +99,10 @@ POSTHOOK: query: EXPLAIN SELECT
 FROM alltypesorc_string
 ORDER BY c1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -116,26 +120,61 @@ STAGE PLANS:
                 TableScan
                   alias: alltypesorc_string
                   Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE 
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1]
                   Select Operator
                     expressions: to_unix_timestamp(ctimestamp1) (type: 
bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), 
day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), 
weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), 
minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int)
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10]
+                        selectExpressions: VectorUDFUnixTimeStampTimestamp(col 
0) -> 2:long, VectorUDFYearTimestamp(col 0, field YEAR) -> 3:long, 
VectorUDFMonthTimestamp(col 0, field MONTH) -> 4:long, 
VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 5:long, 
VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 6:long, 
VectorUDFWeekOfYearTimestamp(col 0, field WEEK_OF_YEAR) -> 7:long, 
VectorUDFHourTimestamp(col 0, field HOUR_OF_DAY) -> 8:long, 
VectorUDFMinuteTimestamp(col 0, field MINUTE) -> 9:long, 
VectorUDFSecondTimestamp(col 0, field SECOND) -> 10:long
                     Statistics: Num rows: 40 Data size: 84 Basic stats: 
COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col0 (type: bigint)
                       sort order: +
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkOperator
+                          native: false
+                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+                          nativeConditionsNotMet: Uniform Hash IS false
                       Statistics: Num rows: 40 Data size: 84 Basic stats: 
COMPLETE Column stats: NONE
                       value expressions: _col1 (type: int), _col2 (type: int), 
_col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), 
_col7 (type: int), _col8 (type: int)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
             Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 
(type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 
(type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 
(type: int), VALUE._col7 (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
                 Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE 
Column stats: NONE
                 File Output Operator
                   compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
                   Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE 
Column stats: NONE
                   table:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -218,7 +257,7 @@ NULL        NULL    NULL    NULL    NULL    NULL    NULL    
NULL    NULL
 NULL   NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
 NULL   NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
 NULL   NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
-PREHOOK: query: EXPLAIN SELECT
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
   to_unix_timestamp(stimestamp1) AS c1,
   year(stimestamp1),
   month(stimestamp1),
@@ -231,7 +270,7 @@ PREHOOK: query: EXPLAIN SELECT
 FROM alltypesorc_string
 ORDER BY c1
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
   to_unix_timestamp(stimestamp1) AS c1,
   year(stimestamp1),
   month(stimestamp1),
@@ -244,6 +283,10 @@ POSTHOOK: query: EXPLAIN SELECT
 FROM alltypesorc_string
 ORDER BY c1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -261,26 +304,61 @@ STAGE PLANS:
                 TableScan
                   alias: alltypesorc_string
                   Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE 
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1]
                   Select Operator
                     expressions: to_unix_timestamp(stimestamp1) (type: 
bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), 
day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), 
weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), 
minute(stimestamp1) (type: int), second(stimestamp1) (type: int)
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10]
+                        selectExpressions: VectorUDFUnixTimeStampString(col 1) 
-> 2:long, VectorUDFYearString(col 1, fieldStart 0, fieldLength 4) -> 3:long, 
VectorUDFMonthString(col 1, fieldStart 5, fieldLength 2) -> 4:long, 
VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 5:long, 
VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 6:long, 
VectorUDFWeekOfYearString(col 1) -> 7:long, VectorUDFHourString(col 1, 
fieldStart 11, fieldLength 2) -> 8:long, VectorUDFMinuteString(col 1, 
fieldStart 14, fieldLength 2) -> 9:long, VectorUDFSecondString(col 1, 
fieldStart 17, fieldLength 2) -> 10:long
                     Statistics: Num rows: 40 Data size: 84 Basic stats: 
COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col0 (type: bigint)
                       sort order: +
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkOperator
+                          native: false
+                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+                          nativeConditionsNotMet: Uniform Hash IS false
                       Statistics: Num rows: 40 Data size: 84 Basic stats: 
COMPLETE Column stats: NONE
                       value expressions: _col1 (type: int), _col2 (type: int), 
_col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), 
_col7 (type: int), _col8 (type: int)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
             Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 
(type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 
(type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 
(type: int), VALUE._col7 (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
                 Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE 
Column stats: NONE
                 File Output Operator
                   compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
                   Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE 
Column stats: NONE
                   table:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -363,7 +441,7 @@ NULL        NULL    NULL    NULL    NULL    NULL    NULL    
NULL    NULL
 NULL   NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
 NULL   NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
 NULL   NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
-PREHOOK: query: EXPLAIN SELECT
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
   to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1,
   year(ctimestamp1) = year(stimestamp1),
   month(ctimestamp1) = month(stimestamp1),
@@ -376,7 +454,7 @@ PREHOOK: query: EXPLAIN SELECT
 FROM alltypesorc_string
 ORDER BY c1
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
   to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1,
   year(ctimestamp1) = year(stimestamp1),
   month(ctimestamp1) = month(stimestamp1),
@@ -389,6 +467,10 @@ POSTHOOK: query: EXPLAIN SELECT
 FROM alltypesorc_string
 ORDER BY c1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -406,26 +488,61 @@ STAGE PLANS:
                 TableScan
                   alias: alltypesorc_string
                   Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE 
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1]
                   Select Operator
                     expressions: (to_unix_timestamp(ctimestamp1) = 
to_unix_timestamp(stimestamp1)) (type: boolean), (year(ctimestamp1) = 
year(stimestamp1)) (type: boolean), (month(ctimestamp1) = month(stimestamp1)) 
(type: boolean), (day(ctimestamp1) = day(stimestamp1)) (type: boolean), 
(dayofmonth(ctimestamp1) = dayofmonth(stimestamp1)) (type: boolean), 
(weekofyear(ctimestamp1) = weekofyear(stimestamp1)) (type: boolean), 
(hour(ctimestamp1) = hour(stimestamp1)) (type: boolean), (minute(ctimestamp1) = 
minute(stimestamp1)) (type: boolean), (second(ctimestamp1) = 
second(stimestamp1)) (type: boolean)
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [4, 5, 6, 7, 8, 9, 10, 11, 12]
+                        selectExpressions: LongColEqualLongColumn(col 2, col 
3)(children: VectorUDFUnixTimeStampTimestamp(col 0) -> 2:long, 
VectorUDFUnixTimeStampString(col 1) -> 3:long) -> 4:long, 
LongColEqualLongColumn(col 2, col 3)(children: VectorUDFYearTimestamp(col 0, 
field YEAR) -> 2:long, VectorUDFYearString(col 1, fieldStart 0, fieldLength 4) 
-> 3:long) -> 5:long, LongColEqualLongColumn(col 2, col 3)(children: 
VectorUDFMonthTimestamp(col 0, field MONTH) -> 2:long, VectorUDFMonthString(col 
1, fieldStart 5, fieldLength 2) -> 3:long) -> 6:long, 
LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 
0, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthString(col 1, fieldStart 
8, fieldLength 2) -> 3:long) -> 7:long, LongColEqualLongColumn(col 2, col 
3)(children: VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 2:long, 
VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 3:long) -> 
8:long, LongColEqualLongColumn(col 2, col 3)(childre
 n: VectorUDFWeekOfYearTimestamp(col 0, field WEEK_OF_YEAR) -> 2:long, 
VectorUDFWeekOfYearString(col 1) -> 3:long) -> 9:long, 
LongColEqualLongColumn(col 2, col 3)(children: VectorUDFHourTimestamp(col 0, 
field HOUR_OF_DAY) -> 2:long, VectorUDFHourString(col 1, fieldStart 11, 
fieldLength 2) -> 3:long) -> 10:long, LongColEqualLongColumn(col 2, col 
3)(children: VectorUDFMinuteTimestamp(col 0, field MINUTE) -> 2:long, 
VectorUDFMinuteString(col 1, fieldStart 14, fieldLength 2) -> 3:long) -> 
11:long, LongColEqualLongColumn(col 2, col 3)(children: 
VectorUDFSecondTimestamp(col 0, field SECOND) -> 2:long, 
VectorUDFSecondString(col 1, fieldStart 17, fieldLength 2) -> 3:long) -> 12:long
                     Statistics: Num rows: 40 Data size: 84 Basic stats: 
COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col0 (type: boolean)
                       sort order: +
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkOperator
+                          native: false
+                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+                          nativeConditionsNotMet: Uniform Hash IS false
                       Statistics: Num rows: 40 Data size: 84 Basic stats: 
COMPLETE Column stats: NONE
                       value expressions: _col1 (type: boolean), _col2 (type: 
boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), 
_col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
             Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 
(type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), 
VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: 
boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
                 Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE 
Column stats: NONE
                 File Output Operator
                   compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
                   Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE 
Column stats: NONE
                   table:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -511,7 +628,7 @@ NULL        NULL    NULL    NULL    NULL    NULL    NULL    
NULL    NULL
 NULL   NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
 NULL   NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
 PREHOOK: query: -- Wrong format. Should all be NULL.
-EXPLAIN SELECT
+EXPLAIN VECTORIZATION EXPRESSION  SELECT
   to_unix_timestamp(stimestamp1) AS c1,
   year(stimestamp1),
   month(stimestamp1),
@@ -525,7 +642,7 @@ FROM alltypesorc_wrong
 ORDER BY c1
 PREHOOK: type: QUERY
 POSTHOOK: query: -- Wrong format. Should all be NULL.
-EXPLAIN SELECT
+EXPLAIN VECTORIZATION EXPRESSION  SELECT
   to_unix_timestamp(stimestamp1) AS c1,
   year(stimestamp1),
   month(stimestamp1),
@@ -538,6 +655,10 @@ EXPLAIN SELECT
 FROM alltypesorc_wrong
 ORDER BY c1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -555,26 +676,61 @@ STAGE PLANS:
                 TableScan
                   alias: alltypesorc_wrong
                   Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE 
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0]
                   Select Operator
                     expressions: to_unix_timestamp(stimestamp1) (type: 
bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), 
day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), 
weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), 
minute(stimestamp1) (type: int), second(stimestamp1) (type: int)
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9]
+                        selectExpressions: VectorUDFUnixTimeStampString(col 0) 
-> 1:long, VectorUDFYearString(col 0, fieldStart 0, fieldLength 4) -> 2:long, 
VectorUDFMonthString(col 0, fieldStart 5, fieldLength 2) -> 3:long, 
VectorUDFDayOfMonthString(col 0, fieldStart 8, fieldLength 2) -> 4:long, 
VectorUDFDayOfMonthString(col 0, fieldStart 8, fieldLength 2) -> 5:long, 
VectorUDFWeekOfYearString(col 0) -> 6:long, VectorUDFHourString(col 0, 
fieldStart 11, fieldLength 2) -> 7:long, VectorUDFMinuteString(col 0, 
fieldStart 14, fieldLength 2) -> 8:long, VectorUDFSecondString(col 0, 
fieldStart 17, fieldLength 2) -> 9:long
                     Statistics: Num rows: 3 Data size: 294 Basic stats: 
COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col0 (type: bigint)
                       sort order: +
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkOperator
+                          native: false
+                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+                          nativeConditionsNotMet: Uniform Hash IS false
                       Statistics: Num rows: 3 Data size: 294 Basic stats: 
COMPLETE Column stats: NONE
                       value expressions: _col1 (type: int), _col2 (type: int), 
_col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), 
_col7 (type: int), _col8 (type: int)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
             Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 
(type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 
(type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 
(type: int), VALUE._col7 (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
                 Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE 
Column stats: NONE
                 File Output Operator
                   compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
                   Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE 
Column stats: NONE
                   table:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -620,20 +776,24 @@ POSTHOOK: Input: default@alltypesorc_wrong
 NULL   NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
 NULL   NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
 NULL   NULL    NULL    NULL    NULL    NULL    NULL    NULL    NULL
-PREHOOK: query: EXPLAIN SELECT
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
   min(ctimestamp1),
   max(ctimestamp1),
   count(ctimestamp1),
   count(*)
 FROM alltypesorc_string
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
   min(ctimestamp1),
   max(ctimestamp1),
   count(ctimestamp1),
   count(*)
 FROM alltypesorc_string
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -651,31 +811,73 @@ STAGE PLANS:
                 TableScan
                   alias: alltypesorc_string
                   Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE 
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1]
                   Select Operator
                     expressions: ctimestamp1 (type: timestamp)
                     outputColumnNames: ctimestamp1
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [0]
                     Statistics: Num rows: 40 Data size: 84 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: min(ctimestamp1), max(ctimestamp1), 
count(ctimestamp1), count()
+                      Group By Vectorization:
+                          aggregators: VectorUDAFMinTimestamp(col 0) -> 
timestamp, VectorUDAFMaxTimestamp(col 0) -> timestamp, VectorUDAFCount(col 0) 
-> bigint, VectorUDAFCountStar(*) -> bigint
+                          className: VectorGroupByOperator
+                          vectorOutput: true
+                          native: false
+                          projectedOutputColumns: [0, 1, 2, 3]
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3
                       Statistics: Num rows: 1 Data size: 96 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
                         sort order: 
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkOperator
+                            native: false
+                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+                            nativeConditionsNotMet: Uniform Hash IS false
                         Statistics: Num rows: 1 Data size: 96 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col0 (type: timestamp), _col1 
(type: timestamp), _col2 (type: bigint), _col3 (type: bigint)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
             Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Group By Operator
                 aggregations: min(VALUE._col0), max(VALUE._col1), 
count(VALUE._col2), count(VALUE._col3)
+                Group By Vectorization:
+                    aggregators: VectorUDAFMinTimestamp(col 0) -> timestamp, 
VectorUDAFMaxTimestamp(col 1) -> timestamp, VectorUDAFCountMerge(col 2) -> 
bigint, VectorUDAFCountMerge(col 3) -> bigint
+                    className: VectorGroupByOperator
+                    vectorOutput: true
+                    native: false
+                    projectedOutputColumns: [0, 1, 2, 3]
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3
                 Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE 
Column stats: NONE
                 File Output Operator
                   compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
                   Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE 
Column stats: NONE
                   table:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -708,15 +910,19 @@ POSTHOOK: Input: default@alltypesorc_string
 #### A masked pattern was here ####
 NULL   NULL    0       40
 PREHOOK: query: -- SUM of timestamps are not vectorized reduce-side because 
they produce a double instead of a long (HIVE-8211)...
-EXPLAIN SELECT
+EXPLAIN VECTORIZATION EXPRESSION  SELECT
   round(sum(ctimestamp1), 3)
 FROM alltypesorc_string
 PREHOOK: type: QUERY
 POSTHOOK: query: -- SUM of timestamps are not vectorized reduce-side because 
they produce a double instead of a long (HIVE-8211)...
-EXPLAIN SELECT
+EXPLAIN VECTORIZATION EXPRESSION  SELECT
   round(sum(ctimestamp1), 3)
 FROM alltypesorc_string
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -749,20 +955,47 @@ STAGE PLANS:
                         value expressions: _col0 (type: double)
             Execution mode: llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                notVectorizedReason: Aggregation Function expression for 
GROUPBY operator: Vectorization of aggreation should have succeeded 
org.apache.hadoop.hive.ql.metadata.HiveException: Vector aggregate not 
implemented: "sum" for type: "TIMESTAMP (UDAF evaluator mode = PARTIAL1)
+                vectorized: false
         Reducer 2 
             Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFSumDouble(col 0) -> double
+                    className: VectorGroupByOperator
+                    vectorOutput: true
+                    native: false
+                    projectedOutputColumns: [0]
                 mode: mergepartial
                 outputColumnNames: _col0
                 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
                 Select Operator
                   expressions: round(_col0, 3) (type: double)
                   outputColumnNames: _col0
+                  Select Vectorization:
+                      className: VectorSelectOperator
+                      native: true
+                      projectedOutputColumns: [1]
+                      selectExpressions: RoundWithNumDigitsDoubleToDouble(col 
0, decimalPlaces 3) -> 1:double
                   Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
                   File Output Operator
                     compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
                     Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
                     table:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -788,7 +1021,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc_string
 #### A masked pattern was here ####
 NULL
-PREHOOK: query: EXPLAIN SELECT
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
   round(avg(ctimestamp1), 0),
   variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19,
   var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19,
@@ -799,7 +1032,7 @@ PREHOOK: query: EXPLAIN SELECT
   round(stddev_samp(ctimestamp1), 3)
 FROM alltypesorc_string
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
   round(avg(ctimestamp1), 0),
   variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19,
   var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19,
@@ -810,6 +1043,10 @@ POSTHOOK: query: EXPLAIN SELECT
   round(stddev_samp(ctimestamp1), 3)
 FROM alltypesorc_string
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -827,12 +1064,26 @@ STAGE PLANS:
                 TableScan
                   alias: alltypesorc_string
                   Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE 
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1]
                   Select Operator
                     expressions: ctimestamp1 (type: timestamp)
                     outputColumnNames: ctimestamp1
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [0]
                     Statistics: Num rows: 40 Data size: 84 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: avg(ctimestamp1), variance(ctimestamp1), 
var_pop(ctimestamp1), var_samp(ctimestamp1), std(ctimestamp1), 
stddev(ctimestamp1), stddev_pop(ctimestamp1), stddev_samp(ctimestamp1)
+                      Group By Vectorization:
+                          aggregators: VectorUDAFAvgTimestamp(col 0) -> 
struct<count:bigint,sum:double>, VectorUDAFVarPopTimestamp(col 0) -> 
struct<count:bigint,sum:double,variance:double>, VectorUDAFVarPopTimestamp(col 
0) -> struct<count:bigint,sum:double,variance:double>, 
VectorUDAFVarSampTimestamp(col 0) -> 
struct<count:bigint,sum:double,variance:double>, VectorUDAFStdPopTimestamp(col 
0) -> struct<count:bigint,sum:double,variance:double>, 
VectorUDAFStdPopTimestamp(col 0) -> 
struct<count:bigint,sum:double,variance:double>, VectorUDAFStdPopTimestamp(col 
0) -> struct<count:bigint,sum:double,variance:double>, 
VectorUDAFStdSampTimestamp(col 0) -> 
struct<count:bigint,sum:double,variance:double>
+                          className: VectorGroupByOperator
+                          vectorOutput: false
+                          native: false
+                          projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7]
+                          vectorOutputConditionsNotMet: Vector output of 
VectorUDAFAvgTimestamp(col 0) -> struct<count:bigint,sum:double> output type 
STRUCT requires PRIMITIVE IS false, Vector output of 
VectorUDAFVarPopTimestamp(col 0) -> 
struct<count:bigint,sum:double,variance:double> output type STRUCT requires 
PRIMITIVE IS false, Vector output of VectorUDAFVarPopTimestamp(col 0) -> 
struct<count:bigint,sum:double,variance:double> output type STRUCT requires 
PRIMITIVE IS false, Vector output of VectorUDAFVarSampTimestamp(col 0) -> 
struct<count:bigint,sum:double,variance:double> output type STRUCT requires 
PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> 
struct<count:bigint,sum:double,variance:double> output type STRUCT requires 
PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> 
struct<count:bigint,sum:double,variance:double> output type STRUCT requires 
PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> 
struct<count
 :bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS 
false, Vector output of VectorUDAFStdSampTimestamp(col 0) -> 
struct<count:bigint,sum:double,variance:double> output type STRUCT requires 
PRIMITIVE IS false
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7
                       Statistics: Num rows: 1 Data size: 672 Basic stats: 
COMPLETE Column stats: NONE
@@ -842,8 +1093,21 @@ STAGE PLANS:
                         value expressions: _col0 (type: 
struct<count:bigint,sum:double,input:timestamp>), _col1 (type: 
struct<count:bigint,sum:double,variance:double>), _col2 (type: 
struct<count:bigint,sum:double,variance:double>), _col3 (type: 
struct<count:bigint,sum:double,variance:double>), _col4 (type: 
struct<count:bigint,sum:double,variance:double>), _col5 (type: 
struct<count:bigint,sum:double,variance:double>), _col6 (type: 
struct<count:bigint,sum:double,variance:double>), _col7 (type: 
struct<count:bigint,sum:double,variance:double>)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: false
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
             Execution mode: llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                notVectorizedReason: Aggregation Function UDF avg parameter 
expression for GROUPBY operator: Data type 
struct<count:bigint,sum:double,input:timestamp> of Column[VALUE._col0] not 
supported
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: avg(VALUE._col0), variance(VALUE._col1), 
var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), 
stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7)

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_ints_casts.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_ints_casts.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_ints_casts.q.out
index 1e74446..2a142a0 100644
--- 
a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_ints_casts.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_ints_casts.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select
 -- to timestamp
   cast (ctinyint as timestamp)
@@ -16,7 +16,7 @@ from alltypesorc
 -- limit output to a reasonably small number of rows
 where cbigint % 250 = 0
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select
 -- to timestamp
   cast (ctinyint as timestamp)
@@ -34,22 +34,69 @@ from alltypesorc
 -- limit output to a reasonably small number of rows
 where cbigint % 250 = 0
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc
+                  Statistics: Num rows: 12288 Data size: 1684250 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterLongColEqualLongScalar(col 
12, val 0)(children: LongColModuloLongScalar(col 3, val 250) -> 12:long) -> 
boolean
+                    predicate: ((cbigint % 250) = 0) (type: boolean)
+                    Statistics: Num rows: 6144 Data size: 842180 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: CAST( ctinyint AS TIMESTAMP) (type: 
timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS 
TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), 
CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) 
(type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( 
(cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), 
CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS 
TIMESTAMP) (type: timestamp)
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumns: [14, 16, 18, 20, 21, 22, 24, 
26, 8, 27, 29]
+                          selectExpressions: 
CastMillisecondsLongToTimestamp(col 0) -> 14:timestamp, 
CastMillisecondsLongToTimestamp(col 1) -> 16:timestamp, 
CastMillisecondsLongToTimestamp(col 2) -> 18:timestamp, 
CastMillisecondsLongToTimestamp(col 3) -> 20:timestamp, 
CastDoubleToTimestamp(col 4) -> 21:timestamp, CastDoubleToTimestamp(col 5) -> 
22:timestamp, CastMillisecondsLongToTimestamp(col 10) -> 24:timestamp, 
CastMillisecondsLongToTimestamp(col 12)(children: LongColMultiplyLongScalar(col 
3, val 0) -> 12:long) -> 26:timestamp, VectorUDFAdaptor(CAST( cstring1 AS 
TIMESTAMP)) -> 27:Timestamp, VectorUDFAdaptor(CAST( substr(cstring1, 1, 1) AS 
TIMESTAMP))(children: StringSubstrColStartLen(col 6, start 0, length 1) -> 
28:string) -> 29:Timestamp
+                      Statistics: Num rows: 6144 Data size: 2703360 Basic 
stats: COMPLETE Column stats: COMPLETE
+                      File Output Operator
+                        compressed: false
+                        File Sink Vectorization:
+                            className: VectorFileSinkOperator
+                            native: false
+                        Statistics: Num rows: 6144 Data size: 2703360 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        table:
+                            input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: true
+                vectorized: true
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
-        TableScan
-          alias: alltypesorc
-          Filter Operator
-            predicate: ((cbigint % 250) = 0) (type: boolean)
-            Select Operator
-              expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), 
CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) 
(type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat 
AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), 
CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS 
TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS 
TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: 
timestamp)
-              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10
-              ListSink
+        ListSink
 
 PREHOOK: query: select
 -- to timestamp
@@ -115,7 +162,7 @@ POSTHOOK: Input: default@alltypesorc
 1969-12-31 15:59:59.95 1969-12-31 15:59:52.804 NULL    1969-12-19 17:33:32.75  
1969-12-31 15:59:10     1969-12-31 14:00:04     NULL    1969-12-31 16:00:00     
1969-12-31 15:59:54.733 NULL    NULL
 1969-12-31 16:00:00.011        NULL    1969-12-30 22:03:04.018 1970-01-21 
12:50:53.75  1969-12-31 16:00:11     NULL    1969-12-31 16:00:00.001 1969-12-31 
16:00:00     1969-12-31 16:00:02.351 NULL    NULL
 1969-12-31 16:00:00.011        NULL    1969-12-27 18:49:09.583 1970-01-14 
22:35:27     1969-12-31 16:00:11     NULL    1969-12-31 16:00:00.001 1969-12-31 
16:00:00     1969-12-31 16:00:02.351 NULL    NULL
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select
 -- to timestamp
   cast (ctinyint as timestamp)
@@ -133,7 +180,7 @@ from alltypesorc
 -- limit output to a reasonably small number of rows
 where cbigint % 250 = 0
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select
 -- to timestamp
   cast (ctinyint as timestamp)
@@ -151,22 +198,69 @@ from alltypesorc
 -- limit output to a reasonably small number of rows
 where cbigint % 250 = 0
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc
+                  Statistics: Num rows: 12288 Data size: 1684250 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterLongColEqualLongScalar(col 
12, val 0)(children: LongColModuloLongScalar(col 3, val 250) -> 12:long) -> 
boolean
+                    predicate: ((cbigint % 250) = 0) (type: boolean)
+                    Statistics: Num rows: 6144 Data size: 842180 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: CAST( ctinyint AS TIMESTAMP) (type: 
timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS 
TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), 
CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) 
(type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( 
(cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), 
CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS 
TIMESTAMP) (type: timestamp)
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19, 
20, 8, 21, 23]
+                          selectExpressions: CastLongToTimestamp(col 0) -> 
13:timestamp, CastLongToTimestamp(col 1) -> 14:timestamp, 
CastLongToTimestamp(col 2) -> 15:timestamp, CastLongToTimestamp(col 3) -> 
16:timestamp, CastDoubleToTimestamp(col 4) -> 17:timestamp, 
CastDoubleToTimestamp(col 5) -> 18:timestamp, CastLongToTimestamp(col 10) -> 
19:timestamp, CastLongToTimestamp(col 12)(children: 
LongColMultiplyLongScalar(col 3, val 0) -> 12:long) -> 20:timestamp, 
VectorUDFAdaptor(CAST( cstring1 AS TIMESTAMP)) -> 21:Timestamp, 
VectorUDFAdaptor(CAST( substr(cstring1, 1, 1) AS TIMESTAMP))(children: 
StringSubstrColStartLen(col 6, start 0, length 1) -> 22:string) -> 23:Timestamp
+                      Statistics: Num rows: 6144 Data size: 2703360 Basic 
stats: COMPLETE Column stats: COMPLETE
+                      File Output Operator
+                        compressed: false
+                        File Sink Vectorization:
+                            className: VectorFileSinkOperator
+                            native: false
+                        Statistics: Num rows: 6144 Data size: 2703360 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        table:
+                            input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: true
+                vectorized: true
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
-        TableScan
-          alias: alltypesorc
-          Filter Operator
-            predicate: ((cbigint % 250) = 0) (type: boolean)
-            Select Operator
-              expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), 
CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) 
(type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat 
AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), 
CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS 
TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS 
TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: 
timestamp)
-              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10
-              ListSink
+        ListSink
 
 PREHOOK: query: select
 -- to timestamp

[07/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

Reply via email to