http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out
b/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out
index d8003ba..e7d1963 100644
--- a/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out
@@ -39,7 +39,7 @@ POSTHOOK: Lineage: vector_interval_1.str1 EXPRESSION []
POSTHOOK: Lineage: vector_interval_1.str2 EXPRESSION []
POSTHOOK: Lineage: vector_interval_1.ts EXPRESSION []
PREHOOK: query: -- constants/cast from string
-explain
+explain vectorization expression
select
str1,
interval '1-2' year to month, interval_year_month(str1),
@@ -47,13 +47,17 @@ select
from vector_interval_1 order by str1
PREHOOK: type: QUERY
POSTHOOK: query: -- constants/cast from string
-explain
+explain vectorization expression
select
str1,
interval '1-2' year to month, interval_year_month(str1),
interval '1 2:3:4' day to second, interval_day_time(str2)
from vector_interval_1 order by str1
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -71,26 +75,62 @@ STAGE PLANS:
TableScan
alias: vector_interval_1
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Select Operator
expressions: str1 (type: string), CAST( str1 AS INTERVAL
YEAR TO MONTH) (type: interval_year_month), CAST( str2 AS INTERVAL DAY TO
SECOND) (type: interval_day_time)
outputColumnNames: _col0, _col2, _col4
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [2, 4, 5]
+ selectExpressions: CastStringToIntervalYearMonth(col
2) -> 4:interval_year_month, CastStringToIntervalDayTime(col 3) ->
5:interval_day_time
Statistics: Num rows: 2 Data size: 442 Basic stats:
COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for
keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 2 Data size: 442 Basic stats:
COMPLETE Column stats: NONE
value expressions: _col2 (type: interval_year_month),
_col4 (type: interval_day_time)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), 1-2 (type:
interval_year_month), VALUE._col0 (type: interval_year_month), 1
02:03:04.000000000 (type: interval_day_time), VALUE._col1 (type:
interval_day_time)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 3, 1, 4, 2]
+ selectExpressions: ConstantVectorExpression(val 14) ->
3:long, ConstantVectorExpression(val 1 02:03:04.000000000) ->
4:interval_day_time
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE
Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE
Column stats: NONE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -122,7 +162,7 @@ POSTHOOK: Input: default@vector_interval_1
NULL 1-2 NULL 1 02:03:04.000000000 NULL
1-2 1-2 1-2 1 02:03:04.000000000 1 02:03:04.000000000
PREHOOK: query: -- interval arithmetic
-explain
+explain vectorization expression
select
dt,
interval '1-2' year to month + interval '1-2' year to month,
@@ -134,7 +174,7 @@ select
from vector_interval_1 order by dt
PREHOOK: type: QUERY
POSTHOOK: query: -- interval arithmetic
-explain
+explain vectorization expression
select
dt,
interval '1-2' year to month + interval '1-2' year to month,
@@ -145,6 +185,10 @@ select
interval '1-2' year to month - interval_year_month(str1)
from vector_interval_1 order by dt
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -162,26 +206,62 @@ STAGE PLANS:
TableScan
alias: vector_interval_1
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Select Operator
expressions: dt (type: date), (CAST( str1 AS INTERVAL YEAR
TO MONTH) + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month),
(1-2 + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month),
(CAST( str1 AS INTERVAL YEAR TO MONTH) - CAST( str1 AS INTERVAL YEAR TO MONTH))
(type: interval_year_month), (1-2 - CAST( str1 AS INTERVAL YEAR TO MONTH))
(type: interval_year_month)
outputColumnNames: _col0, _col2, _col3, _col5, _col6
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1, 6, 5, 8, 7]
+ selectExpressions:
IntervalYearMonthColAddIntervalYearMonthColumn(col 4, col 5)(children:
CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month,
CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 6:long,
IntervalYearMonthScalarAddIntervalYearMonthColumn(val 14, col 4)(children:
CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month) -> 5:long,
IntervalYearMonthColSubtractIntervalYearMonthColumn(col 4, col 7)(children:
CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month,
CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 8:long,
IntervalYearMonthScalarSubtractIntervalYearMonthColumn(val 14, col 4)(children:
CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month) -> 7:long
Statistics: Num rows: 2 Data size: 442 Basic stats:
COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: date)
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for
keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 2 Data size: 442 Basic stats:
COMPLETE Column stats: NONE
value expressions: _col2 (type: interval_year_month),
_col3 (type: interval_year_month), _col5 (type: interval_year_month), _col6
(type: interval_year_month)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: date), 2-4 (type:
interval_year_month), VALUE._col0 (type: interval_year_month), VALUE._col1
(type: interval_year_month), 0-0 (type: interval_year_month), VALUE._col2
(type: interval_year_month), VALUE._col3 (type: interval_year_month)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 5, 1, 2, 6, 3, 4]
+ selectExpressions: ConstantVectorExpression(val 28) ->
5:long, ConstantVectorExpression(val 0) -> 6:long
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE
Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE
Column stats: NONE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -220,7 +300,7 @@ POSTHOOK: Input: default@vector_interval_1
#### A masked pattern was here ####
NULL 2-4 NULL NULL 0-0 NULL NULL
2001-01-01 2-4 2-4 2-4 0-0 0-0 0-0
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
select
dt,
interval '1 2:3:4' day to second + interval '1 2:3:4' day to second,
@@ -231,7 +311,7 @@ select
interval '1 2:3:4' day to second - interval_day_time(str2)
from vector_interval_1 order by dt
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
select
dt,
interval '1 2:3:4' day to second + interval '1 2:3:4' day to second,
@@ -242,6 +322,10 @@ select
interval '1 2:3:4' day to second - interval_day_time(str2)
from vector_interval_1 order by dt
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -259,26 +343,62 @@ STAGE PLANS:
TableScan
alias: vector_interval_1
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Select Operator
expressions: dt (type: date), (CAST( str2 AS INTERVAL DAY
TO SECOND) + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time),
(1 02:03:04.000000000 + CAST( str2 AS INTERVAL DAY TO SECOND)) (type:
interval_day_time), (CAST( str2 AS INTERVAL DAY TO SECOND) - CAST( str2 AS
INTERVAL DAY TO SECOND)) (type: interval_day_time), (1 02:03:04.000000000 -
CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time)
outputColumnNames: _col0, _col2, _col3, _col5, _col6
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1, 6, 5, 8, 7]
+ selectExpressions:
IntervalDayTimeColAddIntervalDayTimeColumn(col 4, col 5)(children:
CastStringToIntervalDayTime(col 3) -> 4:interval_day_time,
CastStringToIntervalDayTime(col 3) -> 5:interval_day_time) ->
6:interval_day_time, IntervalDayTimeScalarAddIntervalDayTimeColumn(val 1
02:03:04.000000000, col 4)(children: CastStringToIntervalDayTime(col 3) ->
4:interval_day_time) -> 5:timestamp,
IntervalDayTimeColSubtractIntervalDayTimeColumn(col 4, col 7)(children:
CastStringToIntervalDayTime(col 3) -> 4:interval_day_time,
CastStringToIntervalDayTime(col 3) -> 7:interval_day_time) ->
8:interval_day_time, IntervalDayTimeScalarSubtractIntervalDayTimeColumn(val 1
02:03:04.000000000, col 4)(children: CastStringToIntervalDayTime(col 3) ->
4:interval_day_time) -> 7:timestamp
Statistics: Num rows: 2 Data size: 442 Basic stats:
COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: date)
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for
keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 2 Data size: 442 Basic stats:
COMPLETE Column stats: NONE
value expressions: _col2 (type: interval_day_time),
_col3 (type: interval_day_time), _col5 (type: interval_day_time), _col6 (type:
interval_day_time)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: date), 2
04:06:08.000000000 (type: interval_day_time), VALUE._col1 (type:
interval_day_time), VALUE._col2 (type: interval_day_time), 0 00:00:00.000000000
(type: interval_day_time), VALUE._col4 (type: interval_day_time), VALUE._col5
(type: interval_day_time)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 5, 1, 2, 6, 3, 4]
+ selectExpressions: ConstantVectorExpression(val 2
04:06:08.000000000) -> 5:interval_day_time, ConstantVectorExpression(val 0
00:00:00.000000000) -> 6:interval_day_time
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE
Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE
Column stats: NONE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -318,7 +438,7 @@ POSTHOOK: Input: default@vector_interval_1
NULL 2 04:06:08.000000000 NULL NULL 0 00:00:00.000000000 NULL
NULL
2001-01-01 2 04:06:08.000000000 2 04:06:08.000000000 2
04:06:08.000000000 0 00:00:00.000000000 0 00:00:00.000000000 0
00:00:00.000000000
PREHOOK: query: -- date-interval arithmetic
-explain
+explain vectorization expression
select
dt,
dt + interval '1-2' year to month,
@@ -336,7 +456,7 @@ select
from vector_interval_1 order by dt
PREHOOK: type: QUERY
POSTHOOK: query: -- date-interval arithmetic
-explain
+explain vectorization expression
select
dt,
dt + interval '1-2' year to month,
@@ -353,6 +473,10 @@ select
dt - interval_day_time(str2)
from vector_interval_1 order by dt
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -370,26 +494,61 @@ STAGE PLANS:
TableScan
alias: vector_interval_1
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Select Operator
expressions: dt (type: date), (dt + 1-2) (type: date), (dt
+ CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: date), (1-2 + dt) (type: date),
(CAST( str1 AS INTERVAL YEAR TO MONTH) + dt) (type: date), (dt - 1-2) (type:
date), (dt - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: date), (dt + 1
02:03:04.000000000) (type: timestamp), (dt + CAST( str2 AS INTERVAL DAY TO
SECOND)) (type: timestamp), (1 02:03:04.000000000 + dt) (type: timestamp),
(CAST( str2 AS INTERVAL DAY TO SECOND) + dt) (type: timestamp), (dt - 1
02:03:04.000000000) (type: timestamp), (dt - CAST( str2 AS INTERVAL DAY TO
SECOND)) (type: timestamp)
outputColumnNames: _col0, _col1, _col2, _col3, _col4,
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1, 4, 6, 7, 8, 9, 10, 11, 13,
14, 15, 16, 17]
+ selectExpressions:
DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 4:long,
DateColAddIntervalYearMonthColumn(col 1, col 5)(children:
CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 6:long,
IntervalYearMonthScalarAddDateColumn(val 1-2, col 1) -> 7:long,
IntervalYearMonthColAddDateColumn(col 5, col 1)(children:
CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 8:long,
DateColSubtractIntervalYearMonthScalar(col 1, val 1-2) -> 9:long,
DateColSubtractIntervalYearMonthColumn(col 1, col 5)(children:
CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 10:long,
DateColAddIntervalDayTimeScalar(col 1, val 1 02:03:04.000000000) ->
11:timestamp, DateColAddIntervalDayTimeColumn(col 1, col 12)(children:
CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 13:timestamp,
IntervalDayTimeScalarAddDateColumn(val 1 02:03:04.000000000, col 1) ->
14:timestamp, IntervalDayTimeColAddDateColumn(col 12, col 1)(children: Cas
tStringToIntervalDayTime(col 3) -> 12:interval_day_time) ->
15:interval_day_time, DateColSubtractIntervalDayTimeScalar(col 1, val 1
02:03:04.000000000) -> 16:timestamp, DateColSubtractIntervalDayTimeColumn(col
1, col 12)(children: CastStringToIntervalDayTime(col 3) ->
12:interval_day_time) -> 17:timestamp
Statistics: Num rows: 2 Data size: 442 Basic stats:
COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: date)
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for
keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 2 Data size: 442 Basic stats:
COMPLETE Column stats: NONE
value expressions: _col1 (type: date), _col2 (type:
date), _col3 (type: date), _col4 (type: date), _col5 (type: date), _col6 (type:
date), _col7 (type: timestamp), _col8 (type: timestamp), _col9 (type:
timestamp), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type:
timestamp)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: date), VALUE._col0
(type: date), VALUE._col1 (type: date), VALUE._col2 (type: date), VALUE._col3
(type: date), VALUE._col4 (type: date), VALUE._col5 (type: date), VALUE._col6
(type: timestamp), VALUE._col7 (type: timestamp), VALUE._col8 (type:
timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp),
VALUE._col11 (type: timestamp)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12]
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE
Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE
Column stats: NONE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -441,7 +600,7 @@ POSTHOOK: Input: default@vector_interval_1
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL
2001-01-01 2002-03-01 2002-03-01 2002-03-01 2002-03-01
1999-11-01 1999-11-01 2001-01-02 02:03:04 2001-01-02 02:03:04
2001-01-02 02:03:04 2001-01-02 02:03:04 2000-12-30 21:56:56
2000-12-30 21:56:56
PREHOOK: query: -- timestamp-interval arithmetic
-explain
+explain vectorization expression
select
ts,
ts + interval '1-2' year to month,
@@ -459,7 +618,7 @@ select
from vector_interval_1 order by ts
PREHOOK: type: QUERY
POSTHOOK: query: -- timestamp-interval arithmetic
-explain
+explain vectorization expression
select
ts,
ts + interval '1-2' year to month,
@@ -476,6 +635,10 @@ select
ts - interval_day_time(str2)
from vector_interval_1 order by ts
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -493,26 +656,61 @@ STAGE PLANS:
TableScan
alias: vector_interval_1
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Select Operator
expressions: ts (type: timestamp), (ts + 1-2) (type:
timestamp), (ts + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: timestamp),
(1-2 + ts) (type: timestamp), (CAST( str1 AS INTERVAL YEAR TO MONTH) + ts)
(type: timestamp), (ts - 1-2) (type: timestamp), (ts - CAST( str1 AS INTERVAL
YEAR TO MONTH)) (type: timestamp), (ts + 1 02:03:04.000000000) (type:
timestamp), (ts + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp), (1
02:03:04.000000000 + ts) (type: timestamp), (CAST( str2 AS INTERVAL DAY TO
SECOND) + ts) (type: timestamp), (ts - 1 02:03:04.000000000) (type: timestamp),
(ts - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp)
outputColumnNames: _col0, _col1, _col2, _col3, _col4,
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 4, 6, 7, 8, 9, 10, 11, 13,
14, 15, 16, 17]
+ selectExpressions:
TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 4:timestamp,
TimestampColAddIntervalYearMonthColumn(col 0, col 5)(children:
CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 6:timestamp,
IntervalYearMonthScalarAddTimestampColumn(val 1-2, col 0) -> 7:timestamp,
IntervalYearMonthColAddTimestampColumn(col 5, col 0)(children:
CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 8:timestamp,
TimestampColSubtractIntervalYearMonthScalar(col 0, val 1-2) -> 9:timestamp,
TimestampColSubtractIntervalYearMonthColumn(col 0, col 5)(children:
CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 10:timestamp,
TimestampColAddIntervalDayTimeScalar(col 0, val 1 02:03:04.000000000) ->
11:timestamp, TimestampColAddIntervalDayTimeColumn(col 0, col 12)(children:
CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 13:timestamp,
IntervalDayTimeScalarAddTimestampColumn(val 1 02:03:04.000000000, col 0) ->
14:timestamp, IntervalDayTimeColAddTimestampColumn(col 12, col 0)(children:
CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 15:timestamp,
TimestampColSubtractIntervalDayTimeScalar(col 0, val 1 02:03:04.000000000) ->
16:timestamp, TimestampColSubtractIntervalDayTimeColumn(col 0, col
12)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) ->
17:timestamp
Statistics: Num rows: 2 Data size: 442 Basic stats:
COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: timestamp)
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for
keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 2 Data size: 442 Basic stats:
COMPLETE Column stats: NONE
value expressions: _col1 (type: timestamp), _col2 (type:
timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type:
timestamp), _col6 (type: timestamp), _col7 (type: timestamp), _col8 (type:
timestamp), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type:
timestamp), _col12 (type: timestamp)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0
(type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type:
timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp),
VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7
(type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type:
timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12]
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE
Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE
Column stats: NONE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -564,7 +762,7 @@ POSTHOOK: Input: default@vector_interval_1
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL
2001-01-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03
2002-03-01 01:02:03 2002-03-01 01:02:03 1999-11-01 01:02:03
1999-11-01 01:02:03 2001-01-02 03:05:07 2001-01-02 03:05:07
2001-01-02 03:05:07 2001-01-02 03:05:07 2000-12-30 22:58:59
2000-12-30 22:58:59
PREHOOK: query: -- timestamp-timestamp arithmetic
-explain
+explain vectorization expression
select
ts,
ts - ts,
@@ -573,7 +771,7 @@ select
from vector_interval_1 order by ts
PREHOOK: type: QUERY
POSTHOOK: query: -- timestamp-timestamp arithmetic
-explain
+explain vectorization expression
select
ts,
ts - ts,
@@ -581,6 +779,10 @@ select
ts - timestamp '2001-01-01 01:02:03'
from vector_interval_1 order by ts
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -598,26 +800,61 @@ STAGE PLANS:
TableScan
alias: vector_interval_1
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Select Operator
expressions: ts (type: timestamp), (ts - ts) (type:
interval_day_time), (2001-01-01 01:02:03.0 - ts) (type: interval_day_time), (ts
- 2001-01-01 01:02:03.0) (type: interval_day_time)
outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 4, 5, 6]
+ selectExpressions:
TimestampColSubtractTimestampColumn(col 0, col 0) -> 4:interval_day_time,
TimestampScalarSubtractTimestampColumn(val 2001-01-01 01:02:03.0, col 0) ->
5:timestamp, TimestampColSubtractTimestampScalar(col 0, val 2001-01-01
01:02:03.0) -> 6:interval_day_time
Statistics: Num rows: 2 Data size: 442 Basic stats:
COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: timestamp)
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for
keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 2 Data size: 442 Basic stats:
COMPLETE Column stats: NONE
value expressions: _col1 (type: interval_day_time),
_col2 (type: interval_day_time), _col3 (type: interval_day_time)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0
(type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2
(type: interval_day_time)
outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE
Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE
Column stats: NONE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -651,7 +888,7 @@ POSTHOOK: Input: default@vector_interval_1
NULL NULL NULL NULL
2001-01-01 01:02:03 0 00:00:00.000000000 0 00:00:00.000000000 0
00:00:00.000000000
PREHOOK: query: -- date-date arithmetic
-explain
+explain vectorization expression
select
dt,
dt - dt,
@@ -660,7 +897,7 @@ select
from vector_interval_1 order by dt
PREHOOK: type: QUERY
POSTHOOK: query: -- date-date arithmetic
-explain
+explain vectorization expression
select
dt,
dt - dt,
@@ -668,6 +905,10 @@ select
dt - date '2001-01-01'
from vector_interval_1 order by dt
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -685,26 +926,61 @@ STAGE PLANS:
TableScan
alias: vector_interval_1
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Select Operator
expressions: dt (type: date), (dt - dt) (type:
interval_day_time), (2001-01-01 - dt) (type: interval_day_time), (dt -
2001-01-01) (type: interval_day_time)
outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1, 4, 5, 6]
+ selectExpressions: DateColSubtractDateColumn(col 1,
col 1) -> 4:timestamp, DateScalarSubtractDateColumn(val 2001-01-01 00:00:00.0,
col 1) -> 5:timestamp, DateColSubtractDateScalar(col 1, val 2001-01-01
00:00:00.0) -> 6:timestamp
Statistics: Num rows: 2 Data size: 442 Basic stats:
COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: date)
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for
keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 2 Data size: 442 Basic stats:
COMPLETE Column stats: NONE
value expressions: _col1 (type: interval_day_time),
_col2 (type: interval_day_time), _col3 (type: interval_day_time)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: date), VALUE._col0
(type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2
(type: interval_day_time)
outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE
Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE
Column stats: NONE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -738,7 +1014,7 @@ POSTHOOK: Input: default@vector_interval_1
NULL NULL NULL NULL
2001-01-01 0 00:00:00.000000000 0 00:00:00.000000000 0
00:00:00.000000000
PREHOOK: query: -- date-timestamp arithmetic
-explain
+explain vectorization expression
select
dt,
ts - dt,
@@ -750,7 +1026,7 @@ select
from vector_interval_1 order by dt
PREHOOK: type: QUERY
POSTHOOK: query: -- date-timestamp arithmetic
-explain
+explain vectorization expression
select
dt,
ts - dt,
@@ -761,6 +1037,10 @@ select
date '2001-01-01' - ts
from vector_interval_1 order by dt
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -778,26 +1058,61 @@ STAGE PLANS:
TableScan
alias: vector_interval_1
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE
Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Select Operator
expressions: dt (type: date), (ts - dt) (type:
interval_day_time), (2001-01-01 01:02:03.0 - dt) (type: interval_day_time), (ts
- 2001-01-01) (type: interval_day_time), (dt - ts) (type: interval_day_time),
(dt - 2001-01-01 01:02:03.0) (type: interval_day_time), (2001-01-01 - ts)
(type: interval_day_time)
outputColumnNames: _col0, _col1, _col2, _col3, _col4,
_col5, _col6
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1, 4, 5, 6, 7, 8, 9]
+ selectExpressions: TimestampColSubtractDateColumn(col
0, col 1) -> 4:interval_day_time, TimestampScalarSubtractDateColumn(val
2001-01-01 01:02:03.0, col 1) -> 5:interval_day_time,
TimestampColSubtractDateScalar(col 0, val 2001-01-01 00:00:00.0) ->
6:interval_day_time, DateColSubtractTimestampColumn(col 1, col 0) ->
7:interval_day_time, DateColSubtractTimestampScalar(col 1, val 2001-01-01
01:02:03.0) -> 8:interval_day_time, DateScalarSubtractTimestampColumn(val
2001-01-01 00:00:00.0, col 0) -> 9:interval_day_time
Statistics: Num rows: 2 Data size: 442 Basic stats:
COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: date)
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for
keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 2 Data size: 442 Basic stats:
COMPLETE Column stats: NONE
value expressions: _col1 (type: interval_day_time),
_col2 (type: interval_day_time), _col3 (type: interval_day_time), _col4 (type:
interval_day_time), _col5 (type: interval_day_time), _col6 (type:
interval_day_time)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: date), VALUE._col0
(type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2
(type: interval_day_time), VALUE._col3 (type: interval_day_time), VALUE._col4
(type: interval_day_time), VALUE._col5 (type: interval_day_time)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6]
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE
Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE
Column stats: NONE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat