[11/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

sershe Mon, 17 Oct 2016 13:42:07 -0700

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out
index 6c6c6d6..14606ed 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out
@@ -94,12 +94,16 @@ POSTHOOK: Input: default@alltypesorc
 POSTHOOK: Output: default@vsmb_bucket_txt
 POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
 POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, 
comment:null), ]
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = 
b.key
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = 
b.key
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -117,33 +121,71 @@ STAGE PLANS:
                 TableScan
                   alias: a
                   Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE 
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1]
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: SelectColumnIsNotNull(col 0) -> 
boolean
                     predicate: key is not null (type: boolean)
                     Statistics: Num rows: 2 Data size: 208 Basic stats: 
COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: key (type: int)
                       sort order: +
                       Map-reduce partition columns: key (type: int)
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkLongOperator
+                          native: true
+                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                       Statistics: Num rows: 2 Data size: 208 Basic stats: 
COMPLETE Column stats: NONE
                       value expressions: value (type: string)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Map 3 
             Map Operator Tree:
                 TableScan
                   alias: b
                   Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE 
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1]
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: SelectColumnIsNotNull(col 0) -> 
boolean
                     predicate: key is not null (type: boolean)
                     Statistics: Num rows: 2 Data size: 208 Basic stats: 
COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: key (type: int)
                       sort order: +
                       Map-reduce partition columns: key (type: int)
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkLongOperator
+                          native: true
+                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                       Statistics: Num rows: 2 Data size: 208 Basic stats: 
COMPLETE Column stats: NONE
                       value expressions: value (type: string)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -187,12 +229,16 @@ POSTHOOK: Input: default@vsmb_bucket_2
 528534767      cvLH6Eat2yFsyy7p        528534767       cvLH6Eat2yFsyy7p
 528534767      cvLH6Eat2yFsyy7p        528534767       cvLH6Eat2yFsyy7p
 528534767      cvLH6Eat2yFsyy7p        528534767       cvLH6Eat2yFsyy7p
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = 
b.key
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = 
b.key
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -210,17 +256,36 @@ STAGE PLANS:
                 TableScan
                   alias: a
                   Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE 
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1]
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: SelectColumnIsNotNull(col 0) -> 
boolean
                     predicate: key is not null (type: boolean)
                     Statistics: Num rows: 2 Data size: 208 Basic stats: 
COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: key (type: int)
                       sort order: +
                       Map-reduce partition columns: key (type: int)
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkLongOperator
+                          native: true
+                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                       Statistics: Num rows: 2 Data size: 208 Basic stats: 
COMPLETE Column stats: NONE
                       value expressions: value (type: string)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Map 3 
             Map Operator Tree:
                 TableScan
@@ -237,6 +302,10 @@ STAGE PLANS:
                       value expressions: value (type: string)
             Execution mode: llap
             LLAP IO: no inputs
+            Map Vectorization:
+                enabled: false
+                enabledConditionsNotMet: 
hive.vectorized.use.row.serde.deserialize IS false
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.RCFileInputFormat
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -285,7 +354,7 @@ PREHOOK: query: -- RC file does not yet provide the 
vectorized CommonRCFileforma
 -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on 
a.key = b.key;
 -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on 
a.key = b.key;
 
-explain
+explain vectorization expression
 select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key 
= b.key
 PREHOOK: type: QUERY
 POSTHOOK: query: -- RC file does not yet provide the vectorized 
CommonRCFileformat out-of-the-box
@@ -293,9 +362,13 @@ POSTHOOK: query: -- RC file does not yet provide the 
vectorized CommonRCFileform
 -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on 
a.key = b.key;
 -- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on 
a.key = b.key;
 
-explain
+explain vectorization expression
 select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key 
= b.key
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -313,17 +386,36 @@ STAGE PLANS:
                 TableScan
                   alias: a
                   Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE 
Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1]
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: SelectColumnIsNotNull(col 0) -> 
boolean
                     predicate: key is not null (type: boolean)
                     Statistics: Num rows: 2 Data size: 208 Basic stats: 
COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: key (type: int)
                       sort order: +
                       Map-reduce partition columns: key (type: int)
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkLongOperator
+                          native: true
+                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                       Statistics: Num rows: 2 Data size: 208 Basic stats: 
COMPLETE Column stats: NONE
                       value expressions: value (type: string)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Map 3 
             Map Operator Tree:
                 TableScan
@@ -340,6 +432,10 @@ STAGE PLANS:
                       value expressions: value (type: string)
             Execution mode: llap
             LLAP IO: no inputs
+            Map Vectorization:
+                enabled: false
+                enabledConditionsNotMet: 
hive.vectorized.use.vector.serde.deserialize IS false
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:


http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_case.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_case.q.out
index 6e13369..127c2c3 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_case.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select 
   csmallint,
   case 
@@ -16,7 +16,7 @@ where csmallint = 418
 or csmallint = 12205
 or csmallint = 10583
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select 
   csmallint,
   case 
@@ -34,6 +34,10 @@ where csmallint = 418
 or csmallint = 12205
 or csmallint = 10583
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -48,15 +52,30 @@ STAGE PLANS:
                 TableScan
                   alias: alltypesorc
                   Statistics: Num rows: 12288 Data size: 36700 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprOrExpr(children: 
FilterLongColEqualLongScalar(col 1, val 418) -> boolean, 
FilterLongColEqualLongScalar(col 1, val 12205) -> boolean, 
FilterLongColEqualLongScalar(col 1, val 10583) -> boolean) -> boolean
                     predicate: ((csmallint = 418) or (csmallint = 12205) or 
(csmallint = 10583)) (type: boolean)
                     Statistics: Num rows: 6 Data size: 20 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: csmallint (type: smallint), CASE WHEN 
((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') 
END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 
12205)) THEN ('b') ELSE ('c') END (type: string)
                       outputColumnNames: _col0, _col1, _col2
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumns: [1, 14, 15]
+                          selectExpressions: VectorUDFAdaptor(CASE WHEN 
((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') 
END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, 
LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 14:String, 
VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 
12205)) THEN ('b') ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 
418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 
15:String
                       Statistics: Num rows: 6 Data size: 2228 Basic stats: 
COMPLETE Column stats: COMPLETE
                       File Output Operator
                         compressed: false
+                        File Sink Vectorization:
+                            className: VectorFileSinkOperator
+                            native: false
                         Statistics: Num rows: 6 Data size: 2228 Basic stats: 
COMPLETE Column stats: COMPLETE
                         table:
                             input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -64,6 +83,14 @@ STAGE PLANS:
                             serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: true
+                vectorized: true
 
   Stage: Stage-0
     Fetch Operator
@@ -113,7 +140,7 @@ POSTHOOK: Input: default@alltypesorc
 10583  c       c
 418    a       a
 12205  b       b
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select 
   csmallint,
   case 
@@ -131,7 +158,7 @@ where csmallint = 418
 or csmallint = 12205
 or csmallint = 10583
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select 
   csmallint,
   case 
@@ -149,6 +176,10 @@ where csmallint = 418
 or csmallint = 12205
 or csmallint = 10583
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -163,15 +194,30 @@ STAGE PLANS:
                 TableScan
                   alias: alltypesorc
                   Statistics: Num rows: 12288 Data size: 36700 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
                   Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: FilterExprOrExpr(children: 
FilterLongColEqualLongScalar(col 1, val 418) -> boolean, 
FilterLongColEqualLongScalar(col 1, val 12205) -> boolean, 
FilterLongColEqualLongScalar(col 1, val 10583) -> boolean) -> boolean
                     predicate: ((csmallint = 418) or (csmallint = 12205) or 
(csmallint = 10583)) (type: boolean)
                     Statistics: Num rows: 6 Data size: 20 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: csmallint (type: smallint), CASE WHEN 
((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE 
(null) END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN 
((csmallint = 12205)) THEN (null) ELSE ('c') END (type: string)
                       outputColumnNames: _col0, _col1, _col2
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumns: [1, 14, 15]
+                          selectExpressions: VectorUDFAdaptor(CASE WHEN 
((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE 
(null) END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, 
LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 14:String, 
VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 
12205)) THEN (null) ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 
418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 
15:String
                       Statistics: Num rows: 6 Data size: 2228 Basic stats: 
COMPLETE Column stats: COMPLETE
                       File Output Operator
                         compressed: false
+                        File Sink Vectorization:
+                            className: VectorFileSinkOperator
+                            native: false
                         Statistics: Num rows: 6 Data size: 2228 Basic stats: 
COMPLETE Column stats: COMPLETE
                         table:
                             input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -179,6 +225,14 @@ STAGE PLANS:
                             serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: true
+                vectorized: true
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out
index a95702d..0fb8552 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out
@@ -3,7 +3,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS
 -- Currently, vectorization is not supported in fetch task 
(hive.fetch.task.conversion=none)
 -- Test type casting in vectorized mode to verify end-to-end functionality.
 
-explain 
+explain vectorization 
 select 
 -- to boolean
    cast (ctinyint as boolean)
@@ -82,7 +82,7 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS
 -- Currently, vectorization is not supported in fetch task 
(hive.fetch.task.conversion=none)
 -- Test type casting in vectorized mode to verify end-to-end functionality.
 
-explain 
+explain vectorization 
 select 
 -- to boolean
    cast (ctinyint as boolean)
@@ -156,6 +156,10 @@ from alltypesorc
 -- limit output to a reasonably small number of rows
 where cbigint % 250 = 0
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -186,6 +190,14 @@ STAGE PLANS:
                             serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: true
+                vectorized: true
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vectorized_context.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_context.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_context.q.out
index 1f70a01..855a50f 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_context.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_context.q.out
@@ -82,20 +82,24 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
 POSTHOOK: Output: default@household_demographics
 POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
-PREHOOK: query: explain 
+PREHOOK: query: explain vectorization 
 select store.s_city, ss_net_profit
 from store_sales
 JOIN store ON store_sales.ss_store_sk = store.s_store_sk
 JOIN household_demographics ON store_sales.ss_hdemo_sk = 
household_demographics.hd_demo_sk
 limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain 
+POSTHOOK: query: explain vectorization 
 select store.s_city, ss_net_profit
 from store_sales
 JOIN store ON store_sales.ss_store_sk = store.s_store_sk
 JOIN household_demographics ON store_sales.ss_hdemo_sk = 
household_demographics.hd_demo_sk
 limit 100
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -128,6 +132,14 @@ STAGE PLANS:
                         value expressions: _col1 (type: int), _col2 (type: 
double)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Map 2 
             Map Operator Tree:
                 TableScan
@@ -176,6 +188,14 @@ STAGE PLANS:
                                     serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Map 3 
             Map Operator Tree:
                 TableScan
@@ -195,6 +215,14 @@ STAGE PLANS:
                         Statistics: Num rows: 6075 Data size: 24300 Basic 
stats: COMPLETE Column stats: NONE
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out
index f45e730..e2999a5 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out
@@ -203,7 +203,7 @@ POSTHOOK: Input: default@date_udf_flight_orc
 2010-10-31     2010-10-31 07:00:00
 2010-10-31     2010-10-31 07:00:00
 2010-10-31     2010-10-31 07:00:00
-PREHOOK: query: EXPLAIN SELECT
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
   to_unix_timestamp(fl_time),
   year(fl_time),
   month(fl_time),
@@ -218,7 +218,7 @@ PREHOOK: query: EXPLAIN SELECT
   datediff(fl_time, "2000-01-01")
 FROM date_udf_flight_orc
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
   to_unix_timestamp(fl_time),
   year(fl_time),
   month(fl_time),
@@ -233,20 +233,62 @@ POSTHOOK: query: EXPLAIN SELECT
   datediff(fl_time, "2000-01-01")
 FROM date_udf_flight_orc
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: date_udf_flight_orc
+                  Statistics: Num rows: 137 Data size: 13152 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1]
+                  Select Operator
+                    expressions: to_unix_timestamp(fl_time) (type: bigint), 
year(fl_time) (type: int), month(fl_time) (type: int), day(fl_time) (type: 
int), dayofmonth(fl_time) (type: int), dayofweek(fl_time) (type: int), 
weekofyear(fl_time) (type: int), CAST( fl_time AS DATE) (type: date), 
to_date(fl_time) (type: date), date_add(fl_time, 2) (type: date), 
date_sub(fl_time, 2) (type: date), datediff(fl_time, '2000-01-01') (type: int)
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10, 
11, 12, 13]
+                        selectExpressions: VectorUDFUnixTimeStampTimestamp(col 
1) -> 2:long, VectorUDFYearTimestamp(col 1, field YEAR) -> 3:long, 
VectorUDFMonthTimestamp(col 1, field MONTH) -> 4:long, 
VectorUDFDayOfMonthTimestamp(col 1, field DAY_OF_MONTH) -> 5:long, 
VectorUDFDayOfMonthTimestamp(col 1, field DAY_OF_MONTH) -> 6:long, 
VectorUDFDayOfWeekTimestamp(col 1, field DAY_OF_WEEK) -> 7:long, 
VectorUDFWeekOfYearTimestamp(col 1, field WEEK_OF_YEAR) -> 8:long, 
CastTimestampToDate(col 1) -> 9:date, VectorUDFDateTimestamp(col 1) -> 10:date, 
VectorUDFDateAddColScalar(col 1, val 2) -> 11:date, 
VectorUDFDateSubColScalar(col 1, val 2) -> 12:date, 
VectorUDFDateDiffColScalar(col 1, val 2000-01-01) -> 13:long
+                    Statistics: Num rows: 137 Data size: 13152 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      File Sink Vectorization:
+                          className: VectorFileSinkOperator
+                          native: false
+                      Statistics: Num rows: 137 Data size: 13152 Basic stats: 
COMPLETE Column stats: NONE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
-        TableScan
-          alias: date_udf_flight_orc
-          Select Operator
-            expressions: to_unix_timestamp(fl_time) (type: bigint), 
year(fl_time) (type: int), month(fl_time) (type: int), day(fl_time) (type: 
int), dayofmonth(fl_time) (type: int), dayofweek(fl_time) (type: int), 
weekofyear(fl_time) (type: int), CAST( fl_time AS DATE) (type: date), 
to_date(fl_time) (type: date), date_add(fl_time, 2) (type: date), 
date_sub(fl_time, 2) (type: date), datediff(fl_time, '2000-01-01') (type: int)
-            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11
-            ListSink
+        ListSink
 
 PREHOOK: query: SELECT
   to_unix_timestamp(fl_time),
@@ -419,7 +461,7 @@ POSTHOOK: Input: default@date_udf_flight_orc
 1288533600     2010    10      31      31      1       43      2010-10-31      
2010-10-31      2010-11-02      2010-10-29      3956
 1288533600     2010    10      31      31      1       43      2010-10-31      
2010-10-31      2010-11-02      2010-10-29      3956
 1288533600     2010    10      31      31      1       43      2010-10-31      
2010-10-31      2010-11-02      2010-10-29      3956
-PREHOOK: query: EXPLAIN SELECT
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
   to_unix_timestamp(fl_date),
   year(fl_date),
   month(fl_date),
@@ -434,7 +476,7 @@ PREHOOK: query: EXPLAIN SELECT
   datediff(fl_date, "2000-01-01")
 FROM date_udf_flight_orc
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
   to_unix_timestamp(fl_date),
   year(fl_date),
   month(fl_date),
@@ -449,20 +491,62 @@ POSTHOOK: query: EXPLAIN SELECT
   datediff(fl_date, "2000-01-01")
 FROM date_udf_flight_orc
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: date_udf_flight_orc
+                  Statistics: Num rows: 137 Data size: 13152 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1]
+                  Select Operator
+                    expressions: to_unix_timestamp(fl_date) (type: bigint), 
year(fl_date) (type: int), month(fl_date) (type: int), day(fl_date) (type: 
int), dayofmonth(fl_date) (type: int), dayofweek(fl_date) (type: int), 
weekofyear(fl_date) (type: int), fl_date (type: date), to_date(fl_date) (type: 
date), date_add(fl_date, 2) (type: date), date_sub(fl_date, 2) (type: date), 
datediff(fl_date, '2000-01-01') (type: int)
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 0, 9, 
10, 11, 12]
+                        selectExpressions: VectorUDFUnixTimeStampDate(col 0) 
-> 2:long, VectorUDFYearDate(col 0, field YEAR) -> 3:long, 
VectorUDFMonthDate(col 0, field MONTH) -> 4:long, VectorUDFDayOfMonthDate(col 
0, field DAY_OF_MONTH) -> 5:long, VectorUDFDayOfMonthDate(col 0, field 
DAY_OF_MONTH) -> 6:long, VectorUDFDayOfWeekDate(col 0, field DAY_OF_WEEK) -> 
7:long, VectorUDFWeekOfYearDate(col 0, field WEEK_OF_YEAR) -> 8:long, 
VectorUDFDateLong(col 0) -> 9:date, VectorUDFDateAddColScalar(col 0, val 2) -> 
10:date, VectorUDFDateSubColScalar(col 0, val 2) -> 11:date, 
VectorUDFDateDiffColScalar(col 0, val 2000-01-01) -> 12:long
+                    Statistics: Num rows: 137 Data size: 13152 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      File Sink Vectorization:
+                          className: VectorFileSinkOperator
+                          native: false
+                      Statistics: Num rows: 137 Data size: 13152 Basic stats: 
COMPLETE Column stats: NONE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
-        TableScan
-          alias: date_udf_flight_orc
-          Select Operator
-            expressions: to_unix_timestamp(fl_date) (type: bigint), 
year(fl_date) (type: int), month(fl_date) (type: int), day(fl_date) (type: 
int), dayofmonth(fl_date) (type: int), dayofweek(fl_date) (type: int), 
weekofyear(fl_date) (type: int), fl_date (type: date), to_date(fl_date) (type: 
date), date_add(fl_date, 2) (type: date), date_sub(fl_date, 2) (type: date), 
datediff(fl_date, '2000-01-01') (type: int)
-            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11
-            ListSink
+        ListSink
 
 PREHOOK: query: SELECT
   to_unix_timestamp(fl_date),
@@ -635,7 +719,7 @@ POSTHOOK: Input: default@date_udf_flight_orc
 1288508400     2010    10      31      31      1       43      2010-10-31      
2010-10-31      2010-11-02      2010-10-29      3956
 1288508400     2010    10      31      31      1       43      2010-10-31      
2010-10-31      2010-11-02      2010-10-29      3956
 1288508400     2010    10      31      31      1       43      2010-10-31      
2010-10-31      2010-11-02      2010-10-29      3956
-PREHOOK: query: EXPLAIN SELECT
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
   year(fl_time) = year(fl_date),
   month(fl_time) = month(fl_date),
   day(fl_time) = day(fl_date),
@@ -649,7 +733,7 @@ PREHOOK: query: EXPLAIN SELECT
   datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01")
 FROM date_udf_flight_orc
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
   year(fl_time) = year(fl_date),
   month(fl_time) = month(fl_date),
   day(fl_time) = day(fl_date),
@@ -663,20 +747,62 @@ POSTHOOK: query: EXPLAIN SELECT
   datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01")
 FROM date_udf_flight_orc
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: date_udf_flight_orc
+                  Statistics: Num rows: 137 Data size: 13152 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1]
+                  Select Operator
+                    expressions: (year(fl_time) = year(fl_date)) (type: 
boolean), (month(fl_time) = month(fl_date)) (type: boolean), (day(fl_time) = 
day(fl_date)) (type: boolean), (dayofmonth(fl_time) = dayofmonth(fl_date)) 
(type: boolean), (dayofweek(fl_time) = dayofweek(fl_date)) (type: boolean), 
(weekofyear(fl_time) = weekofyear(fl_date)) (type: boolean), (CAST( fl_time AS 
DATE) = fl_date) (type: boolean), (to_date(fl_time) = to_date(fl_date)) (type: 
boolean), (date_add(fl_time, 2) = date_add(fl_date, 2)) (type: boolean), 
(date_sub(fl_time, 2) = date_sub(fl_date, 2)) (type: boolean), 
(datediff(fl_time, '2000-01-01') = datediff(fl_date, '2000-01-01')) (type: 
boolean)
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [4, 5, 6, 7, 8, 9, 2, 3, 12, 
13, 16]
+                        selectExpressions: LongColEqualLongColumn(col 2, col 
3)(children: VectorUDFYearTimestamp(col 1, field YEAR) -> 2:long, 
VectorUDFYearDate(col 0, field YEAR) -> 3:long) -> 4:long, 
LongColEqualLongColumn(col 2, col 3)(children: VectorUDFMonthTimestamp(col 1, 
field MONTH) -> 2:long, VectorUDFMonthDate(col 0, field MONTH) -> 3:long) -> 
5:long, LongColEqualLongColumn(col 2, col 3)(children: 
VectorUDFDayOfMonthTimestamp(col 1, field DAY_OF_MONTH) -> 2:long, 
VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 3:long) -> 6:long, 
LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 
1, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthDate(col 0, field 
DAY_OF_MONTH) -> 3:long) -> 7:long, LongColEqualLongColumn(col 2, col 
3)(children: VectorUDFDayOfWeekTimestamp(col 1, field DAY_OF_WEEK) -> 2:long, 
VectorUDFDayOfWeekDate(col 0, field DAY_OF_WEEK) -> 3:long) -> 8:long, 
LongColEqualLongColumn(col 2, col 3)(children: VectorUDFWeekOfYearTimestamp
 (col 1, field WEEK_OF_YEAR) -> 2:long, VectorUDFWeekOfYearDate(col 0, field 
WEEK_OF_YEAR) -> 3:long) -> 9:long, LongColEqualLongColumn(col 10, col 
0)(children: CastTimestampToDate(col 1) -> 10:date) -> 2:long, 
LongColEqualLongColumn(col 10, col 11)(children: VectorUDFDateTimestamp(col 1) 
-> 10:date, VectorUDFDateLong(col 0) -> 11:date) -> 3:long, 
LongColEqualLongColumn(col 10, col 11)(children: VectorUDFDateAddColScalar(col 
1, val 2) -> 10:date, VectorUDFDateAddColScalar(col 0, val 2) -> 11:date) -> 
12:long, LongColEqualLongColumn(col 10, col 11)(children: 
VectorUDFDateSubColScalar(col 1, val 2) -> 10:date, 
VectorUDFDateSubColScalar(col 0, val 2) -> 11:date) -> 13:long, 
LongColEqualLongColumn(col 14, col 15)(children: VectorUDFDateDiffColScalar(col 
1, val 2000-01-01) -> 14:long, VectorUDFDateDiffColScalar(col 0, val 
2000-01-01) -> 15:long) -> 16:long
+                    Statistics: Num rows: 137 Data size: 13152 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      File Sink Vectorization:
+                          className: VectorFileSinkOperator
+                          native: false
+                      Statistics: Num rows: 137 Data size: 13152 Basic stats: 
COMPLETE Column stats: NONE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
-        TableScan
-          alias: date_udf_flight_orc
-          Select Operator
-            expressions: (year(fl_time) = year(fl_date)) (type: boolean), 
(month(fl_time) = month(fl_date)) (type: boolean), (day(fl_time) = 
day(fl_date)) (type: boolean), (dayofmonth(fl_time) = dayofmonth(fl_date)) 
(type: boolean), (dayofweek(fl_time) = dayofweek(fl_date)) (type: boolean), 
(weekofyear(fl_time) = weekofyear(fl_date)) (type: boolean), (CAST( fl_time AS 
DATE) = fl_date) (type: boolean), (to_date(fl_time) = to_date(fl_date)) (type: 
boolean), (date_add(fl_time, 2) = date_add(fl_date, 2)) (type: boolean), 
(date_sub(fl_time, 2) = date_sub(fl_date, 2)) (type: boolean), 
(datediff(fl_time, '2000-01-01') = datediff(fl_date, '2000-01-01')) (type: 
boolean)
-            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10
-            ListSink
+        ListSink
 
 PREHOOK: query: -- Should all be true or NULL
 SELECT
@@ -849,7 +975,7 @@ true        true    true    true    true    true    true    
true    true    true    true
 true   true    true    true    true    true    true    true    true    true    
true
 true   true    true    true    true    true    true    true    true    true    
true
 true   true    true    true    true    true    true    true    true    true    
true
-PREHOOK: query: EXPLAIN SELECT 
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT 
   fl_date, 
   to_date(date_add(fl_date, 2)), 
   to_date(date_sub(fl_date, 2)),
@@ -858,7 +984,7 @@ PREHOOK: query: EXPLAIN SELECT
   datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) 
 FROM date_udf_flight_orc LIMIT 10
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT 
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT 
   fl_date, 
   to_date(date_add(fl_date, 2)), 
   to_date(date_sub(fl_date, 2)),
@@ -867,22 +993,68 @@ POSTHOOK: query: EXPLAIN SELECT
   datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) 
 FROM date_udf_flight_orc LIMIT 10
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: date_udf_flight_orc
+                  Statistics: Num rows: 137 Data size: 13152 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1]
+                  Select Operator
+                    expressions: fl_date (type: date), 
to_date(date_add(fl_date, 2)) (type: date), to_date(date_sub(fl_date, 2)) 
(type: date), datediff(fl_date, date_add(fl_date, 2)) (type: int), 
datediff(fl_date, date_sub(fl_date, 2)) (type: int), datediff(date_add(fl_date, 
2), date_sub(fl_date, 2)) (type: int)
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [0, 3, 4, 5, 6, 8]
+                        selectExpressions: VectorUDFDateLong(col 2)(children: 
VectorUDFDateAddColScalar(col 0, val 2) -> 2:date) -> 3:date, 
VectorUDFDateLong(col 2)(children: VectorUDFDateSubColScalar(col 0, val 2) -> 
2:date) -> 4:date, VectorUDFDateDiffColCol(col 0, col 2)(children: 
VectorUDFDateAddColScalar(col 0, val 2) -> 2:date) -> 5:long, 
VectorUDFDateDiffColCol(col 0, col 2)(children: VectorUDFDateSubColScalar(col 
0, val 2) -> 2:date) -> 6:long, VectorUDFDateDiffColCol(col 2, col 7)(children: 
VectorUDFDateAddColScalar(col 0, val 2) -> 2:date, 
VectorUDFDateSubColScalar(col 0, val 2) -> 7:date) -> 8:long
+                    Statistics: Num rows: 137 Data size: 13152 Basic stats: 
COMPLETE Column stats: NONE
+                    Limit
+                      Number of rows: 10
+                      Limit Vectorization:
+                          className: VectorLimitOperator
+                          native: true
+                      Statistics: Num rows: 10 Data size: 960 Basic stats: 
COMPLETE Column stats: NONE
+                      File Output Operator
+                        compressed: false
+                        File Sink Vectorization:
+                            className: VectorFileSinkOperator
+                            native: false
+                        Statistics: Num rows: 10 Data size: 960 Basic stats: 
COMPLETE Column stats: NONE
+                        table:
+                            input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+
   Stage: Stage-0
     Fetch Operator
       limit: 10
       Processor Tree:
-        TableScan
-          alias: date_udf_flight_orc
-          Select Operator
-            expressions: fl_date (type: date), to_date(date_add(fl_date, 2)) 
(type: date), to_date(date_sub(fl_date, 2)) (type: date), datediff(fl_date, 
date_add(fl_date, 2)) (type: int), datediff(fl_date, date_sub(fl_date, 2)) 
(type: int), datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) (type: int)
-            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-            Limit
-              Number of rows: 10
-              ListSink
+        ListSink
 
 PREHOOK: query: SELECT 
   fl_date, 
@@ -927,7 +1099,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@date_udf_flight_orc
 #### A masked pattern was here ####
 2009-07-30
-PREHOOK: query: EXPLAIN SELECT
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
   min(fl_date) AS c1,
   max(fl_date),
   count(fl_date),
@@ -935,7 +1107,7 @@ PREHOOK: query: EXPLAIN SELECT
 FROM date_udf_flight_orc
 ORDER BY c1
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
   min(fl_date) AS c1,
   max(fl_date),
   count(fl_date),
@@ -943,6 +1115,10 @@ POSTHOOK: query: EXPLAIN SELECT
 FROM date_udf_flight_orc
 ORDER BY c1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -961,43 +1137,101 @@ STAGE PLANS:
                 TableScan
                   alias: date_udf_flight_orc
                   Statistics: Num rows: 137 Data size: 13152 Basic stats: 
COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      projectedOutputColumns: [0, 1]
                   Select Operator
                     expressions: fl_date (type: date)
                     outputColumnNames: fl_date
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [0]
                     Statistics: Num rows: 137 Data size: 13152 Basic stats: 
COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: min(fl_date), max(fl_date), 
count(fl_date), count()
+                      Group By Vectorization:
+                          aggregators: VectorUDAFMinLong(col 0) -> date, 
VectorUDAFMaxLong(col 0) -> date, VectorUDAFCount(col 0) -> bigint, 
VectorUDAFCountStar(*) -> bigint
+                          className: VectorGroupByOperator
+                          vectorOutput: true
+                          native: false
+                          projectedOutputColumns: [0, 1, 2, 3]
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3
                       Statistics: Num rows: 1 Data size: 128 Basic stats: 
COMPLETE Column stats: NONE
                       Reduce Output Operator
                         sort order: 
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkOperator
+                            native: false
+                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+                            nativeConditionsNotMet: Uniform Hash IS false
                         Statistics: Num rows: 1 Data size: 128 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col0 (type: date), _col1 (type: 
date), _col2 (type: bigint), _col3 (type: bigint)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
             Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Group By Operator
                 aggregations: min(VALUE._col0), max(VALUE._col1), 
count(VALUE._col2), count(VALUE._col3)
+                Group By Vectorization:
+                    aggregators: VectorUDAFMinLong(col 0) -> date, 
VectorUDAFMaxLong(col 1) -> date, VectorUDAFCountMerge(col 2) -> bigint, 
VectorUDAFCountMerge(col 3) -> bigint
+                    className: VectorGroupByOperator
+                    vectorOutput: true
+                    native: false
+                    projectedOutputColumns: [0, 1, 2, 3]
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3
                 Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: date)
                   sort order: +
+                  Reduce Sink Vectorization:
+                      className: VectorReduceSinkOperator
+                      native: false
+                      nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+                      nativeConditionsNotMet: Uniform Hash IS false
                   Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE 
Column stats: NONE
                   value expressions: _col1 (type: date), _col2 (type: bigint), 
_col3 (type: bigint)
         Reducer 3 
             Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 
(type: date), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint)
                 outputColumnNames: _col0, _col1, _col2, _col3
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumns: [0, 1, 2, 3]
                 Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE 
Column stats: NONE
                 File Output Operator
                   compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
                   Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE 
Column stats: NONE
                   table:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out
index ced9795..0b5d516 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out
@@ -16,10 +16,14 @@ POSTHOOK: Input: default@src
 POSTHOOK: Output: default@dtest
 POSTHOOK: Lineage: dtest.a SCRIPT []
 POSTHOOK: Lineage: dtest.b SIMPLE []
-PREHOOK: query: explain select sum(distinct a), count(distinct a) from dtest
+PREHOOK: query: explain vectorization select sum(distinct a), count(distinct 
a) from dtest
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select sum(distinct a), count(distinct a) from dtest
+POSTHOOK: query: explain vectorization select sum(distinct a), count(distinct 
a) from dtest
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -57,8 +61,23 @@ STAGE PLANS:
                           value expressions: _col0 (type: bigint), _col1 
(type: bigint)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
             Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0), count(VALUE._col1)
@@ -88,10 +107,14 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@dtest
 #### A masked pattern was here ####
 300    1
-PREHOOK: query: explain select sum(distinct cint), count(distinct cint), 
avg(distinct cint), std(distinct cint) from alltypesorc
+PREHOOK: query: explain vectorization select sum(distinct cint), 
count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select sum(distinct cint), count(distinct cint), 
avg(distinct cint), std(distinct cint) from alltypesorc
+POSTHOOK: query: explain vectorization select sum(distinct cint), 
count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -126,8 +149,23 @@ STAGE PLANS:
                         Statistics: Num rows: 5775 Data size: 17248 Basic 
stats: COMPLETE Column stats: COMPLETE
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
             Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                groupByVectorOutput: false
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Group By Operator
                 keys: KEY._col0 (type: int)
@@ -145,6 +183,11 @@ STAGE PLANS:
                     value expressions: _col0 (type: bigint), _col1 (type: 
bigint), _col2 (type: struct<count:bigint,sum:double,input:int>), _col3 (type: 
struct<count:bigint,sum:double,variance:double>)
         Reducer 3 
             Execution mode: llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+                notVectorizedReason: Aggregation Function UDF avg parameter 
expression for GROUPBY operator: Data type 
struct<count:bigint,sum:double,input:int> of Column[VALUE._col2] not supported
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0), count(VALUE._col1), 
avg(VALUE._col2), std(VALUE._col3)

[11/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

Reply via email to