http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out b/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out index d0efe00..d06ae66 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out @@ -94,12 +94,16 @@ POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@vsmb_bucket_txt POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -117,33 +121,71 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -187,12 +229,16 @@ POSTHOOK: Input: default@vsmb_bucket_2 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -210,17 +256,36 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -237,6 +302,10 @@ STAGE PLANS: value expressions: value (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.hive.ql.io.RCFileInputFormat Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -280,12 +349,16 @@ POSTHOOK: Input: default@vsmb_bucket_rc 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -303,17 +376,36 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -330,6 +422,10 @@ STAGE PLANS: value expressions: value (type: string) Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: false + enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat Reducer 2 Execution mode: llap Reduce Operator Tree:
http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vectorized_case.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_case.q.out b/ql/src/test/results/clientpositive/llap/vectorized_case.q.out index 6e13369..b58e707 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_case.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_case.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select csmallint, case @@ -16,7 +16,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select csmallint, case @@ -34,6 +34,10 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -48,15 +52,30 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1, val 418) -> boolean, FilterLongColEqualLongScalar(col 1, val 12205) -> boolean, FilterLongColEqualLongScalar(col 1, val 10583) -> boolean) -> boolean predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) Statistics: Num rows: 6 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 14, 15] + selectExpressions: VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 14:string, VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 15:string Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -64,6 +83,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator @@ -113,7 +140,7 @@ POSTHOOK: Input: default@alltypesorc 10583 c c 418 a a 12205 b b -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select csmallint, case @@ -131,7 +158,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select csmallint, case @@ -149,6 +176,10 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -163,15 +194,30 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1, val 418) -> boolean, FilterLongColEqualLongScalar(col 1, val 12205) -> boolean, FilterLongColEqualLongScalar(col 1, val 10583) -> boolean) -> boolean predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean) Statistics: Num rows: 6 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 14, 15] + selectExpressions: VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 14:string, VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 15:string Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -179,6 +225,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out b/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out index c377ac8..db8df66 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization select cast (ctinyint as boolean) @@ -72,7 +72,7 @@ from alltypesorc where cbigint % 250 = 0 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization select cast (ctinyint as boolean) @@ -146,6 +146,10 @@ from alltypesorc where cbigint % 250 = 0 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -176,6 +180,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vectorized_context.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_context.q.out b/ql/src/test/results/clientpositive/llap/vectorized_context.q.out index 1f70a01..855a50f 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_context.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_context.q.out @@ -82,20 +82,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc POSTHOOK: Output: default@household_demographics POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] -PREHOOK: query: explain +PREHOOK: query: explain vectorization select store.s_city, ss_net_profit from store_sales JOIN store ON store_sales.ss_store_sk = store.s_store_sk JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization select store.s_city, ss_net_profit from store_sales JOIN store ON store_sales.ss_store_sk = store.s_store_sk JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -128,6 +132,14 @@ STAGE PLANS: value expressions: _col1 (type: int), _col2 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -176,6 +188,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 3 Map Operator Tree: TableScan @@ -195,6 +215,14 @@ STAGE PLANS: Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out b/ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out index 40f5b73..9ed03e2 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out @@ -197,7 +197,7 @@ date_udf_flight_orc.fl_date date_udf_flight_orc.fl_time 2010-10-31 2010-10-31 07:00:00 2010-10-31 2010-10-31 07:00:00 2010-10-31 2010-10-31 07:00:00 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_time, to_unix_timestamp(fl_time), year(fl_time), @@ -220,7 +220,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(fl_time, timestamp "2007-03-14 08:21:59") FROM date_udf_flight_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_time, to_unix_timestamp(fl_time), year(fl_time), @@ -244,20 +244,62 @@ POSTHOOK: query: EXPLAIN SELECT FROM date_udf_flight_orc POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: date_udf_flight_orc + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: fl_time (type: timestamp), to_unix_timestamp(fl_time) (type: bigint), year(fl_time) (type: int), month(fl_time) (type: int), day(fl_time) (type: int), dayofmonth(fl_time) (type: int), dayofweek(fl_time) (type: int), weekofyear(fl_time) (type: int), CAST( fl_time AS DATE) (type: date), to_date(fl_time) (type: date), date_add(fl_time, 2) (type: date), date_sub(fl_time, 2) (type: date), datediff(fl_time, '2000-01-01') (type: int), datediff(fl_time, 2000-01-01) (type: int), datediff(fl_time, 2000-01-01 00:00:00.0) (type: int), datediff(fl_time, 2000-01-01 11:13:09.0) (type: int), datediff(fl_time, '2007-03-14') (type: int), datediff(fl_time, 2007-03-14) (type: int), datediff(fl_time, 2007-03-14 00:00:00.0) (type: int), datediff(fl_time, 2007-03-14 08:21:59.0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] + selectExpressions: VectorUDFUnixTimeStampTimestamp(col 1) -> 2:long, VectorUDFYearTimestamp(col 1, field YEAR) -> 3:long, VectorUDFMonthTimestamp(col 1, field MONTH) -> 4:long, VectorUDFDayOfMonthTimestamp(col 1, field DAY_OF_MONTH) -> 5:long, VectorUDFDayOfMonthTimestamp(col 1, field DAY_OF_MONTH) -> 6:long, VectorUDFDayOfWeekTimestamp(col 1, field DAY_OF_WEEK) -> 7:long, VectorUDFWeekOfYearTimestamp(col 1, field WEEK_OF_YEAR) -> 8:long, CastTimestampToDate(col 1) -> 9:date, VectorUDFDateTimestamp(col 1) -> 10:date, VectorUDFDateAddColScalar(col 1, val 2) -> 11:date, VectorUDFDateSubColScalar(col 1, val 2) -> 12:date, VectorUDFDateDiffColScalar(col 1, val 2000-01-01) -> 13:long, VectorUDFDateDiffColScalar(col 1, val NULL) -> 14:long, VectorUDFDateDiffColScalar(col 1, val NULL) -> 15:long, VectorUDFDateDiffColScalar(col 1, val NULL) -> 16:long, VectorUDFDateDiffColScalar(col 1, val 2007-03-14) -> 17:long, VectorUDFDateDiffColScalar(col 1, val NULL) -> 18:long , VectorUDFDateDiffColScalar(col 1, val NULL) -> 19:long, VectorUDFDateDiffColScalar(col 1, val NULL) -> 20:long + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: date_udf_flight_orc - Select Operator - expressions: fl_time (type: timestamp), to_unix_timestamp(fl_time) (type: bigint), year(fl_time) (type: int), month(fl_time) (type: int), day(fl_time) (type: int), dayofmonth(fl_time) (type: int), dayofweek(fl_time) (type: int), weekofyear(fl_time) (type: int), CAST( fl_time AS DATE) (type: date), to_date(fl_time) (type: date), date_add(fl_time, 2) (type: date), date_sub(fl_time, 2) (type: date), datediff(fl_time, '2000-01-01') (type: int), datediff(fl_time, 2000-01-01) (type: int), datediff(fl_time, 2000-01-01 00:00:00.0) (type: int), datediff(fl_time, 2000-01-01 11:13:09.0) (type: int), datediff(fl_time, '2007-03-14') (type: int), datediff(fl_time, 2007-03-14) (type: int), datediff(fl_time, 2007-03-14 00:00:00.0) (type: int), datediff(fl_time, 2007-03-14 08:21:59.0) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 - ListSink + ListSink PREHOOK: query: SELECT fl_time, @@ -447,7 +489,7 @@ fl_time _c1 _c2 _c3 _c4 _c5 _c6 _c7 _c8 _c9 _c10 _c11 _c12 _c13 _c14 _c15 _c16 _ 2010-10-31 07:00:00 1288533600 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 3956 3956 3956 1327 1327 1327 1327 2010-10-31 07:00:00 1288533600 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 3956 3956 3956 1327 1327 1327 1327 2010-10-31 07:00:00 1288533600 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 3956 3956 3956 1327 1327 1327 1327 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_date, to_unix_timestamp(fl_date), year(fl_date), @@ -470,7 +512,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(fl_date, timestamp "2007-03-14 08:21:59") FROM date_udf_flight_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_date, to_unix_timestamp(fl_date), year(fl_date), @@ -494,20 +536,62 @@ POSTHOOK: query: EXPLAIN SELECT FROM date_udf_flight_orc POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: date_udf_flight_orc + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: fl_date (type: date), to_unix_timestamp(fl_date) (type: bigint), year(fl_date) (type: int), month(fl_date) (type: int), day(fl_date) (type: int), dayofmonth(fl_date) (type: int), dayofweek(fl_date) (type: int), weekofyear(fl_date) (type: int), fl_date (type: date), to_date(fl_date) (type: date), date_add(fl_date, 2) (type: date), date_sub(fl_date, 2) (type: date), datediff(fl_date, '2000-01-01') (type: int), datediff(fl_date, 2000-01-01) (type: int), datediff(fl_date, 2000-01-01 00:00:00.0) (type: int), datediff(fl_date, 2000-01-01 11:13:09.0) (type: int), datediff(fl_date, '2007-03-14') (type: int), datediff(fl_date, 2007-03-14) (type: int), datediff(fl_date, 2007-03-14 00:00:00.0) (type: int), datediff(fl_date, 2007-03-14 08:21:59.0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 3, 4, 5, 6, 7, 8, 0, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + selectExpressions: VectorUDFUnixTimeStampDate(col 0) -> 2:long, VectorUDFYearDate(col 0, field YEAR) -> 3:long, VectorUDFMonthDate(col 0, field MONTH) -> 4:long, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 5:long, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 6:long, VectorUDFDayOfWeekDate(col 0, field DAY_OF_WEEK) -> 7:long, VectorUDFWeekOfYearDate(col 0, field WEEK_OF_YEAR) -> 8:long, VectorUDFDateLong(col 0) -> 9:date, VectorUDFDateAddColScalar(col 0, val 2) -> 10:date, VectorUDFDateSubColScalar(col 0, val 2) -> 11:date, VectorUDFDateDiffColScalar(col 0, val 2000-01-01) -> 12:long, VectorUDFDateDiffColScalar(col 0, val NULL) -> 13:long, VectorUDFDateDiffColScalar(col 0, val NULL) -> 14:long, VectorUDFDateDiffColScalar(col 0, val NULL) -> 15:long, VectorUDFDateDiffColScalar(col 0, val 2007-03-14) -> 16:long, VectorUDFDateDiffColScalar(col 0, val NULL) -> 17:long, VectorUDFDateDiffColScalar(col 0, val NULL) -> 18:long, VectorUDFDateDiffColS calar(col 0, val NULL) -> 19:long + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: date_udf_flight_orc - Select Operator - expressions: fl_date (type: date), to_unix_timestamp(fl_date) (type: bigint), year(fl_date) (type: int), month(fl_date) (type: int), day(fl_date) (type: int), dayofmonth(fl_date) (type: int), dayofweek(fl_date) (type: int), weekofyear(fl_date) (type: int), fl_date (type: date), to_date(fl_date) (type: date), date_add(fl_date, 2) (type: date), date_sub(fl_date, 2) (type: date), datediff(fl_date, '2000-01-01') (type: int), datediff(fl_date, 2000-01-01) (type: int), datediff(fl_date, 2000-01-01 00:00:00.0) (type: int), datediff(fl_date, 2000-01-01 11:13:09.0) (type: int), datediff(fl_date, '2007-03-14') (type: int), datediff(fl_date, 2007-03-14) (type: int), datediff(fl_date, 2007-03-14 00:00:00.0) (type: int), datediff(fl_date, 2007-03-14 08:21:59.0) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 - ListSink + ListSink PREHOOK: query: SELECT fl_date, @@ -697,7 +781,7 @@ fl_date _c1 _c2 _c3 _c4 _c5 _c6 _c7 _c8 _c9 _c10 _c11 _c12 _c13 _c14 _c15 _c16 _ 2010-10-31 1288508400 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 3956 3956 3956 1327 1327 1327 1327 2010-10-31 1288508400 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 3956 3956 3956 1327 1327 1327 1327 2010-10-31 1288508400 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956 3956 3956 3956 1327 1327 1327 1327 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_time, fl_date, year(fl_time) = year(fl_date), @@ -722,7 +806,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(fl_date, "2007-03-14") = datediff(fl_date, date "2007-03-14") FROM date_udf_flight_orc PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_time, fl_date, year(fl_time) = year(fl_date), @@ -748,20 +832,62 @@ POSTHOOK: query: EXPLAIN SELECT FROM date_udf_flight_orc POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: date_udf_flight_orc + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: fl_time (type: timestamp), fl_date (type: date), (year(fl_time) = year(fl_date)) (type: boolean), (month(fl_time) = month(fl_date)) (type: boolean), (day(fl_time) = day(fl_date)) (type: boolean), (dayofmonth(fl_time) = dayofmonth(fl_date)) (type: boolean), (dayofweek(fl_time) = dayofweek(fl_date)) (type: boolean), (weekofyear(fl_time) = weekofyear(fl_date)) (type: boolean), (CAST( fl_time AS DATE) = fl_date) (type: boolean), (to_date(fl_time) = to_date(fl_date)) (type: boolean), (date_add(fl_time, 2) = date_add(fl_date, 2)) (type: boolean), (date_sub(fl_time, 2) = date_sub(fl_date, 2)) (type: boolean), (datediff(fl_time, '2000-01-01') = datediff(fl_date, '2000-01-01')) (type: boolean), (datediff(fl_time, 2000-01-01) = datediff(fl_date, 2000-01-01)) (type: boolean), (datediff(fl_time, 2000-01-01 00:00:00.0) = datediff(fl_date, 2000-01-01 00:00:00.0)) (type: boolean), (datediff(fl_time, 2000-01-01 11:13:09.0) = datediff(fl_date, 2000-01-01 11:13:09.0)) (type: boolean), (datediff(fl_time, '2007-03-14') = datediff(fl_date, '2007-03-14')) (type: boolean), (datediff(fl_time, 2007-03-14) = datediff(fl_date, 2007-03-14)) (type: boolean), (datediff(fl_time, 2007-03-14 00:00:00.0) = datediff(fl_date, 2007-03-14 00:00:00.0)) (type: boolean), (datediff(fl_time, 2007-03-14 08:21:59.0) = datediff(fl_date, 2007-03-14 08:21:59.0)) (type: boolean), (datediff(fl_date, '2000-01-01') = datediff(fl_date, 2000-01-01)) (type: boolean), (datediff(fl_date, '2007-03-14') = datediff(fl_date, 2007-03-14)) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 4, 5, 6, 7, 8, 9, 3, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] + selectExpressions: LongColEqualLongColumn(col 2, col 3)(children: VectorUDFYearTimestamp(col 1, field YEAR) -> 2:long, VectorUDFYearDate(col 0, field YEAR) -> 3:long) -> 4:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFMonthTimestamp(col 1, field MONTH) -> 2:long, VectorUDFMonthDate(col 0, field MONTH) -> 3:long) -> 5:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 1, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 3:long) -> 6:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 1, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 3:long) -> 7:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfWeekTimestamp(col 1, field DAY_OF_WEEK) -> 2:long, VectorUDFDayOfWeekDate(col 0, field DAY_OF_WEEK) -> 3:long) -> 8:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFWeekOfYearTimestamp (col 1, field WEEK_OF_YEAR) -> 2:long, VectorUDFWeekOfYearDate(col 0, field WEEK_OF_YEAR) -> 3:long) -> 9:long, LongColEqualLongColumn(col 2, col 0)(children: CastTimestampToDate(col 1) -> 2:date) -> 3:long, LongColEqualLongColumn(col 2, col 10)(children: VectorUDFDateTimestamp(col 1) -> 2:date, VectorUDFDateLong(col 0) -> 10:date) -> 11:long, LongColEqualLongColumn(col 2, col 10)(children: VectorUDFDateAddColScalar(col 1, val 2) -> 2:date, VectorUDFDateAddColScalar(col 0, val 2) -> 10:date) -> 12:long, LongColEqualLongColumn(col 2, col 10)(children: VectorUDFDateSubColScalar(col 1, val 2) -> 2:date, VectorUDFDateSubColScalar(col 0, val 2) -> 10:date) -> 13:long, LongColEqualLongColumn(col 2, col 10)(children: VectorUDFDateDiffColScalar(col 1, val 2000-01-01) -> 2:long, VectorUDFDateDiffColScalar(col 0, val 2000-01-01) -> 10:long) -> 14:long, LongColEqualLongColumn(col 2, col 10)(children: VectorUDFDateDiffColScalar(col 1, val NULL) -> 2:long, VectorUDFDateDiffColScalar(col 0, val N ULL) -> 10:long) -> 15:long, LongColEqualLongColumn(col 2, col 10)(children: VectorUDFDateDiffColScalar(col 1, val NULL) -> 2:long, VectorUDFDateDiffColScalar(col 0, val NULL) -> 10:long) -> 16:long, LongColEqualLongColumn(col 2, col 10)(children: VectorUDFDateDiffColScalar(col 1, val NULL) -> 2:long, VectorUDFDateDiffColScalar(col 0, val NULL) -> 10:long) -> 17:long, LongColEqualLongColumn(col 2, col 10)(children: VectorUDFDateDiffColScalar(col 1, val 2007-03-14) -> 2:long, VectorUDFDateDiffColScalar(col 0, val 2007-03-14) -> 10:long) -> 18:long, LongColEqualLongColumn(col 2, col 10)(children: VectorUDFDateDiffColScalar(col 1, val NULL) -> 2:long, VectorUDFDateDiffColScalar(col 0, val NULL) -> 10:long) -> 19:long, LongColEqualLongColumn(col 2, col 10)(children: VectorUDFDateDiffColScalar(col 1, val NULL) -> 2:long, VectorUDFDateDiffColScalar(col 0, val NULL) -> 10:long) -> 20:long, LongColEqualLongColumn(col 2, col 10)(children: VectorUDFDateDiffColScalar(col 1, val NULL) -> 2:long , VectorUDFDateDiffColScalar(col 0, val NULL) -> 10:long) -> 21:long, LongColEqualLongColumn(col 2, col 10)(children: VectorUDFDateDiffColScalar(col 0, val 2000-01-01) -> 2:long, VectorUDFDateDiffColScalar(col 0, val NULL) -> 10:long) -> 22:long, LongColEqualLongColumn(col 2, col 10)(children: VectorUDFDateDiffColScalar(col 0, val 2007-03-14) -> 2:long, VectorUDFDateDiffColScalar(col 0, val NULL) -> 10:long) -> 23:long + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: date_udf_flight_orc - Select Operator - expressions: fl_time (type: timestamp), fl_date (type: date), (year(fl_time) = year(fl_date)) (type: boolean), (month(fl_time) = month(fl_date)) (type: boolean), (day(fl_time) = day(fl_date)) (type: boolean), (dayofmonth(fl_time) = dayofmonth(fl_date)) (type: boolean), (dayofweek(fl_time) = dayofweek(fl_date)) (type: boolean), (weekofyear(fl_time) = weekofyear(fl_date)) (type: boolean), (CAST( fl_time AS DATE) = fl_date) (type: boolean), (to_date(fl_time) = to_date(fl_date)) (type: boolean), (date_add(fl_time, 2) = date_add(fl_date, 2)) (type: boolean), (date_sub(fl_time, 2) = date_sub(fl_date, 2)) (type: boolean), (datediff(fl_time, '2000-01-01') = datediff(fl_date, '2000-01-01')) (type: boolean), (datediff(fl_time, 2000-01-01) = datediff(fl_date, 2000-01-01)) (type: boolean), (datediff(fl_time, 2000-01-01 00:00:00.0) = datediff(fl_date, 2000-01-01 00:00:00.0)) (type: boolean), (datediff(fl_time, 2000-01-01 11:13:09.0) = datediff(fl_date, 2000-01-01 11:13:09.0)) (type: boolean), (datediff(fl_time, '2007-03-14') = datediff(fl_date, '2007-03-14')) (type: boolean), (datediff(fl_time, 2007-03-14) = datediff(fl_date, 2007-03-14)) (type: boolean), (datediff(fl_time, 2007-03-14 00:00:00.0) = datediff(fl_date, 2007-03-14 00:00:00.0)) (type: boolean), (datediff(fl_time, 2007-03-14 08:21:59.0) = datediff(fl_date, 2007-03-14 08:21:59.0)) (type: boolean), (datediff(fl_date, '2000-01-01') = datediff(fl_date, 2000-01-01)) (type: boolean), (datediff(fl_date, '2007-03-14') = datediff(fl_date, 2007-03-14)) (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - ListSink + ListSink PREHOOK: query: SELECT fl_time, @@ -955,7 +1081,7 @@ fl_time fl_date _c2 _c3 _c4 _c5 _c6 _c7 _c8 _c9 _c10 _c11 _c12 _c13 _c14 _c15 _c 2010-10-31 07:00:00 2010-10-31 true true true true true true true true true true true true true true true true true true true true 2010-10-31 07:00:00 2010-10-31 true true true true true true true true true true true true true true true true true true true true 2010-10-31 07:00:00 2010-10-31 true true true true true true true true true true true true true true true true true true true true -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_date, to_date(date_add(fl_date, 2)), to_date(date_sub(fl_date, 2)), @@ -964,7 +1090,7 @@ PREHOOK: query: EXPLAIN SELECT datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) FROM date_udf_flight_orc LIMIT 10 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT fl_date, to_date(date_add(fl_date, 2)), to_date(date_sub(fl_date, 2)), @@ -974,22 +1100,68 @@ POSTHOOK: query: EXPLAIN SELECT FROM date_udf_flight_orc LIMIT 10 POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: date_udf_flight_orc + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: fl_date (type: date), to_date(date_add(fl_date, 2)) (type: date), to_date(date_sub(fl_date, 2)) (type: date), datediff(fl_date, date_add(fl_date, 2)) (type: int), datediff(fl_date, date_sub(fl_date, 2)) (type: int), datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3, 4, 5, 6, 8] + selectExpressions: VectorUDFDateLong(col 2)(children: VectorUDFDateAddColScalar(col 0, val 2) -> 2:date) -> 3:date, VectorUDFDateLong(col 2)(children: VectorUDFDateSubColScalar(col 0, val 2) -> 2:date) -> 4:date, VectorUDFDateDiffColCol(col 0, col 2)(children: VectorUDFDateAddColScalar(col 0, val 2) -> 2:date) -> 5:long, VectorUDFDateDiffColCol(col 0, col 2)(children: VectorUDFDateSubColScalar(col 0, val 2) -> 2:date) -> 6:long, VectorUDFDateDiffColCol(col 2, col 7)(children: VectorUDFDateAddColScalar(col 0, val 2) -> 2:date, VectorUDFDateSubColScalar(col 0, val 2) -> 7:date) -> 8:long + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: date_udf_flight_orc - Select Operator - expressions: fl_date (type: date), to_date(date_add(fl_date, 2)) (type: date), to_date(date_sub(fl_date, 2)) (type: date), datediff(fl_date, date_add(fl_date, 2)) (type: int), datediff(fl_date, date_sub(fl_date, 2)) (type: int), datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Limit - Number of rows: 10 - ListSink + ListSink PREHOOK: query: SELECT fl_date, @@ -1034,7 +1206,7 @@ POSTHOOK: Input: default@date_udf_flight_orc #### A masked pattern was here #### _c0 2009-07-30 -PREHOOK: query: EXPLAIN SELECT +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT min(fl_date) AS c1, max(fl_date), count(fl_date), @@ -1042,7 +1214,7 @@ PREHOOK: query: EXPLAIN SELECT FROM date_udf_flight_orc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN SELECT +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT min(fl_date) AS c1, max(fl_date), count(fl_date), @@ -1051,6 +1223,10 @@ FROM date_udf_flight_orc ORDER BY c1 POSTHOOK: type: QUERY Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1069,43 +1245,101 @@ STAGE PLANS: TableScan alias: date_udf_flight_orc Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: fl_date (type: date) outputColumnNames: fl_date + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(fl_date), max(fl_date), count(fl_date), count() + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> date, VectorUDAFMaxLong(col 0) -> date, VectorUDAFCount(col 0) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 0) -> date, VectorUDAFMaxLong(col 1) -> date, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint + className: VectorGroupByOperator + vectorOutput: true + native: false + projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: Uniform Hash IS false Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out b/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out index 433bcba..6000ec4 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out @@ -16,10 +16,14 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dtest POSTHOOK: Lineage: dtest.a SCRIPT [] POSTHOOK: Lineage: dtest.b SIMPLE [] -PREHOOK: query: explain select sum(distinct a), count(distinct a) from dtest +PREHOOK: query: explain vectorization select sum(distinct a), count(distinct a) from dtest PREHOOK: type: QUERY -POSTHOOK: query: explain select sum(distinct a), count(distinct a) from dtest +POSTHOOK: query: explain vectorization select sum(distinct a), count(distinct a) from dtest POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -57,8 +61,23 @@ STAGE PLANS: value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1) @@ -88,10 +107,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dtest #### A masked pattern was here #### 300 1 -PREHOOK: query: explain select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc +PREHOOK: query: explain vectorization select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc +POSTHOOK: query: explain vectorization select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -126,8 +149,23 @@ STAGE PLANS: Statistics: Num rows: 5775 Data size: 17248 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + groupByVectorOutput: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -145,6 +183,11 @@ STAGE PLANS: value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: struct<count:bigint,sum:double,input:int>), _col3 (type: struct<count:bigint,sum:double,variance:double>) Reducer 3 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct<count:bigint,sum:double,input:int> of Column[VALUE._col2] not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1), avg(VALUE._col2), std(VALUE._col3)