http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query57.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query57.q.out
b/ql/src/test/results/clientpositive/perf/spark/query57.q.out
index 53b6778..51e644a 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query57.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query57.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
with v1 as(
select i_category, i_brand,
cc_name,
@@ -45,7 +45,7 @@ with v1 as(
order by sum_sales - avg_monthly_sales, 3
limit 100
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
with v1 as(
select i_category, i_brand,
cc_name,
@@ -92,10 +92,6 @@ with v1 as(
order by sum_sales - avg_monthly_sales, 3
limit 100
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
Stage-2 is a root stage
Stage-3 depends on stages: Stage-2
@@ -114,40 +110,18 @@ STAGE PLANS:
alias: call_center
filterExpr: (cc_call_center_sk is not null and cc_name is
not null) (type: boolean)
Statistics: Num rows: 60 Data size: 122700 Basic stats:
COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children:
SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 6:string))
predicate: (cc_call_center_sk is not null and cc_name is
not null) (type: boolean)
Statistics: Num rows: 60 Data size: 122700 Basic stats:
COMPLETE Column stats: NONE
Select Operator
expressions: cc_call_center_sk (type: int), cc_name
(type: string)
outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 6]
Statistics: Num rows: 60 Data size: 122700 Basic stats:
COMPLETE Column stats: NONE
Spark HashTable Sink Operator
- Spark Hash Table Sink Vectorization:
- className: VectorSparkHashTableSinkOperator
- native: true
keys:
0 _col1 (type: int)
1 _col0 (type: int)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Local Work:
Map Reduce Local Work
@@ -161,40 +135,18 @@ STAGE PLANS:
alias: call_center
filterExpr: (cc_call_center_sk is not null and cc_name is
not null) (type: boolean)
Statistics: Num rows: 60 Data size: 122700 Basic stats:
COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children:
SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 6:string))
predicate: (cc_call_center_sk is not null and cc_name is
not null) (type: boolean)
Statistics: Num rows: 60 Data size: 122700 Basic stats:
COMPLETE Column stats: NONE
Select Operator
expressions: cc_call_center_sk (type: int), cc_name
(type: string)
outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 6]
Statistics: Num rows: 60 Data size: 122700 Basic stats:
COMPLETE Column stats: NONE
Spark HashTable Sink Operator
- Spark Hash Table Sink Vectorization:
- className: VectorSparkHashTableSinkOperator
- native: true
keys:
0 _col1 (type: int)
1 _col0 (type: int)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Local Work:
Map Reduce Local Work
@@ -208,40 +160,18 @@ STAGE PLANS:
alias: call_center
filterExpr: (cc_call_center_sk is not null and cc_name is
not null) (type: boolean)
Statistics: Num rows: 60 Data size: 122700 Basic stats:
COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children:
SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 6:string))
predicate: (cc_call_center_sk is not null and cc_name is
not null) (type: boolean)
Statistics: Num rows: 60 Data size: 122700 Basic stats:
COMPLETE Column stats: NONE
Select Operator
expressions: cc_call_center_sk (type: int), cc_name
(type: string)
outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 6]
Statistics: Num rows: 60 Data size: 122700 Basic stats:
COMPLETE Column stats: NONE
Spark HashTable Sink Operator
- Spark Hash Table Sink Vectorization:
- className: VectorSparkHashTableSinkOperator
- native: true
keys:
0 _col1 (type: int)
1 _col0 (type: int)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Local Work:
Map Reduce Local Work
@@ -271,395 +201,183 @@ STAGE PLANS:
alias: catalog_sales
filterExpr: (cs_item_sk is not null and cs_sold_date_sk is
not null and cs_call_center_sk is not null) (type: boolean)
Statistics: Num rows: 287989836 Data size: 38999608952 Basic
stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children:
SelectColumnIsNotNull(col 15:int), SelectColumnIsNotNull(col 0:int),
SelectColumnIsNotNull(col 11:int))
predicate: (cs_call_center_sk is not null and cs_item_sk
is not null and cs_sold_date_sk is not null) (type: boolean)
Statistics: Num rows: 287989836 Data size: 38999608952
Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cs_sold_date_sk (type: int),
cs_call_center_sk (type: int), cs_item_sk (type: int), cs_sales_price (type:
decimal(7,2))
outputColumnNames: _col0, _col1, _col2, _col3
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 11, 15, 21]
Statistics: Num rows: 287989836 Data size: 38999608952
Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 287989836 Data size: 38999608952
Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type:
int), _col3 (type: decimal(7,2))
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 10
Map Operator Tree:
TableScan
alias: item
filterExpr: (i_item_sk is not null and i_category is not
null and i_brand is not null) (type: boolean)
Statistics: Num rows: 462000 Data size: 663560457 Basic
stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children:
SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 12:string),
SelectColumnIsNotNull(col 8:string))
predicate: (i_brand is not null and i_category is not null
and i_item_sk is not null) (type: boolean)
Statistics: Num rows: 462000 Data size: 663560457 Basic
stats: COMPLETE Column stats: NONE
Select Operator
expressions: i_item_sk (type: int), i_brand (type:
string), i_category (type: string)
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 8, 12]
Statistics: Num rows: 462000 Data size: 663560457 Basic
stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 462000 Data size: 663560457
Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string), _col2 (type:
string)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 11
Map Operator Tree:
TableScan
alias: catalog_sales
filterExpr: (cs_item_sk is not null and cs_sold_date_sk is
not null and cs_call_center_sk is not null) (type: boolean)
Statistics: Num rows: 287989836 Data size: 38999608952 Basic
stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children:
SelectColumnIsNotNull(col 15:int), SelectColumnIsNotNull(col 0:int),
SelectColumnIsNotNull(col 11:int))
predicate: (cs_call_center_sk is not null and cs_item_sk
is not null and cs_sold_date_sk is not null) (type: boolean)
Statistics: Num rows: 287989836 Data size: 38999608952
Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cs_sold_date_sk (type: int),
cs_call_center_sk (type: int), cs_item_sk (type: int), cs_sales_price (type:
decimal(7,2))
outputColumnNames: _col0, _col1, _col2, _col3
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 11, 15, 21]
Statistics: Num rows: 287989836 Data size: 38999608952
Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 287989836 Data size: 38999608952
Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type:
int), _col3 (type: decimal(7,2))
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 17
Map Operator Tree:
TableScan
alias: date_dim
filterExpr: (((d_year = 2000) or ((d_year = 1999) and (d_moy
= 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) (type:
boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats:
COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children:
FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 6:int, val 2000),
FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 1999),
FilterLongColEqualLongScalar(col 8:int, val 12)), FilterExprAndExpr(children:
FilterLongColEqualLongScalar(col 6:int, val 2001),
FilterLongColEqualLongScalar(col 8:int, val 1))), SelectColumnIsNotNull(col
0:int))
predicate: (((d_year = 2000) or ((d_year = 1999) and
(d_moy = 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null)
(type: boolean)
Statistics: Num rows: 73048 Data size: 81740712 Basic
stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int), d_year (type: int),
d_moy (type: int)
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 6, 8]
Statistics: Num rows: 73048 Data size: 81740712 Basic
stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 73048 Data size: 81740712 Basic
stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: int)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 19
Map Operator Tree:
TableScan
alias: item
filterExpr: (i_item_sk is not null and i_category is not
null and i_brand is not null) (type: boolean)
Statistics: Num rows: 462000 Data size: 663560457 Basic
stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children:
SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 12:string),
SelectColumnIsNotNull(col 8:string))
predicate: (i_brand is not null and i_category is not null
and i_item_sk is not null) (type: boolean)
Statistics: Num rows: 462000 Data size: 663560457 Basic
stats: COMPLETE Column stats: NONE
Select Operator
expressions: i_item_sk (type: int), i_brand (type:
string), i_category (type: string)
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 8, 12]
Statistics: Num rows: 462000 Data size: 663560457 Basic
stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 462000 Data size: 663560457
Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string), _col2 (type:
string)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 20
Map Operator Tree:
TableScan
alias: catalog_sales
filterExpr: (cs_item_sk is not null and cs_sold_date_sk is
not null and cs_call_center_sk is not null) (type: boolean)
Statistics: Num rows: 287989836 Data size: 38999608952 Basic
stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children:
SelectColumnIsNotNull(col 15:int), SelectColumnIsNotNull(col 0:int),
SelectColumnIsNotNull(col 11:int))
predicate: (cs_call_center_sk is not null and cs_item_sk
is not null and cs_sold_date_sk is not null) (type: boolean)
Statistics: Num rows: 287989836 Data size: 38999608952
Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cs_sold_date_sk (type: int),
cs_call_center_sk (type: int), cs_item_sk (type: int), cs_sales_price (type:
decimal(7,2))
outputColumnNames: _col0, _col1, _col2, _col3
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 11, 15, 21]
Statistics: Num rows: 287989836 Data size: 38999608952
Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 287989836 Data size: 38999608952
Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type:
int), _col3 (type: decimal(7,2))
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 25
Map Operator Tree:
TableScan
alias: date_dim
filterExpr: (((d_year = 2000) or ((d_year = 1999) and (d_moy
= 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) (type:
boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats:
COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children:
FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 6:int, val 2000),
FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 1999),
FilterLongColEqualLongScalar(col 8:int, val 12)), FilterExprAndExpr(children:
FilterLongColEqualLongScalar(col 6:int, val 2001),
FilterLongColEqualLongScalar(col 8:int, val 1))), SelectColumnIsNotNull(col
0:int))
predicate: (((d_year = 2000) or ((d_year = 1999) and
(d_moy = 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null)
(type: boolean)
Statistics: Num rows: 73048 Data size: 81740712 Basic
stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int), d_year (type: int),
d_moy (type: int)
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 6, 8]
Statistics: Num rows: 73048 Data size: 81740712 Basic
stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 73048 Data size: 81740712 Basic
stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: int)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 27
Map Operator Tree:
TableScan
alias: item
filterExpr: (i_item_sk is not null and i_category is not
null and i_brand is not null) (type: boolean)
Statistics: Num rows: 462000 Data size: 663560457 Basic
stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children:
SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 12:string),
SelectColumnIsNotNull(col 8:string))
predicate: (i_brand is not null and i_category is not null
and i_item_sk is not null) (type: boolean)
Statistics: Num rows: 462000 Data size: 663560457 Basic
stats: COMPLETE Column stats: NONE
Select Operator
expressions: i_item_sk (type: int), i_brand (type:
string), i_category (type: string)
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 8, 12]
Statistics: Num rows: 462000 Data size: 663560457 Basic
stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 462000 Data size: 663560457
Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string), _col2 (type:
string)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 8
Map Operator Tree:
TableScan
alias: date_dim
filterExpr: (((d_year = 2000) or ((d_year = 1999) and (d_moy
= 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) (type:
boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats:
COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children:
FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 6:int, val 2000),
FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 1999),
FilterLongColEqualLongScalar(col 8:int, val 12)), FilterExprAndExpr(children:
FilterLongColEqualLongScalar(col 6:int, val 2001),
FilterLongColEqualLongScalar(col 8:int, val 1))), SelectColumnIsNotNull(col
0:int))
predicate: (((d_year = 2000) or ((d_year = 1999) and
(d_moy = 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null)
(type: boolean)
Statistics: Num rows: 73048 Data size: 81740712 Basic
stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int), d_year (type: int),
d_moy (type: int)
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 6, 8]
Statistics: Num rows: 73048 Data size: 81740712 Basic
stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 73048 Data size: 81740712 Basic
stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: int)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Reducer 12
Local Work:
Map Reduce Local Work
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -686,11 +404,6 @@ STAGE PLANS:
Statistics: Num rows: 348467716 Data size: 47189528877
Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: decimal(7,2)), _col5
(type: int), _col6 (type: int), _col8 (type: string)
Reducer 13
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -714,23 +427,9 @@ STAGE PLANS:
value expressions: _col5 (type: decimal(17,2))
Reducer 14
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFSumDecimal(col 5:decimal(17,2)) ->
decimal(17,2)
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:int, col 1:int, col 2:string, col
3:string, col 4:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0]
keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2
(type: string), KEY._col3 (type: string), KEY._col4 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
@@ -739,28 +438,14 @@ STAGE PLANS:
key expressions: _col4 (type: string), _col3 (type: string),
_col2 (type: string), _col0 (type: int)
sort order: ++++
Map-reduce partition columns: _col4 (type: string), _col3
(type: string), _col2 (type: string), _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkMultiKeyOperator
- native: true
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 191657247 Data size: 25954241376 Basic
stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col5 (type:
decimal(17,2))
Reducer 15
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey3 (type: int), VALUE._col0
(type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type:
string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: decimal(17,2))
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [3, 4, 2, 1, 0, 5]
Statistics: Num rows: 191657247 Data size: 25954241376 Basic
stats: COMPLETE Column stats: NONE
PTF Operator
Function definitions:
@@ -781,38 +466,18 @@ STAGE PLANS:
name: avg
window function:
GenericUDAFAverageEvaluatorDecimal
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
- PTF Vectorization:
- className: VectorPTFOperator
- evaluatorClasses: [VectorPTFEvaluatorDecimalAvg]
- functionInputExpressions: [col 5:decimal(17,2)]
- functionNames: [avg]
- native: true
- orderExpressions: [col 0:string, col 1:string, col
2:string, col 3:int]
Statistics: Num rows: 191657247 Data size: 25954241376 Basic
stats: COMPLETE Column stats: NONE
Select Operator
expressions: avg_window_0 (type: decimal(21,6)), _col0
(type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string),
_col4 (type: string), _col5 (type: decimal(17,2))
outputColumnNames: avg_window_0, _col0, _col1, _col2,
_col3, _col4, _col5
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [6, 3, 4, 2, 1, 0, 5]
Statistics: Num rows: 191657247 Data size: 25954241376
Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col4 (type: string), _col3 (type:
string), _col2 (type: string), _col0 (type: int), _col1 (type: int)
sort order: +++++
Map-reduce partition columns: _col4 (type: string),
_col3 (type: string), _col2 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- native: true
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 191657247 Data size: 25954241376
Basic stats: COMPLETE Column stats: NONE
value expressions: avg_window_0 (type: decimal(21,6)),
_col5 (type: decimal(17,2))
Reducer 16
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: PTF operator: More than 1 argument
expression of aggregation function rank
- vectorized: false
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: decimal(21,6)),
KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int),
KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string),
KEY.reducesinkkey0 (type: string), VALUE._col1 (type: decimal(17,2))
@@ -862,11 +527,6 @@ STAGE PLANS:
Reducer 2
Local Work:
Map Reduce Local Work
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -895,11 +555,6 @@ STAGE PLANS:
Reducer 21
Local Work:
Map Reduce Local Work
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -926,11 +581,6 @@ STAGE PLANS:
Statistics: Num rows: 348467716 Data size: 47189528877
Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: decimal(7,2)), _col5
(type: int), _col6 (type: int), _col8 (type: string)
Reducer 22
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -954,23 +604,9 @@ STAGE PLANS:
value expressions: _col5 (type: decimal(17,2))
Reducer 23
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFSumDecimal(col 5:decimal(17,2)) ->
decimal(17,2)
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:int, col 1:int, col 2:string, col
3:string, col 4:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0]
keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2
(type: string), KEY._col3 (type: string), KEY._col4 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
@@ -979,18 +615,9 @@ STAGE PLANS:
key expressions: _col4 (type: string), _col3 (type: string),
_col2 (type: string), _col0 (type: int), _col1 (type: int)
sort order: +++++
Map-reduce partition columns: _col4 (type: string), _col3
(type: string), _col2 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- native: true
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 191657247 Data size: 25954241376 Basic
stats: COMPLETE Column stats: NONE
value expressions: _col5 (type: decimal(17,2))
Reducer 24
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: PTF operator: More than 1 argument
expression of aggregation function rank
- vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey3 (type: int),
KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string),
KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string),
VALUE._col0 (type: decimal(17,2))
@@ -1031,11 +658,6 @@ STAGE PLANS:
Statistics: Num rows: 191657247 Data size: 25954241376
Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: decimal(17,2))
Reducer 3
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -1059,23 +681,9 @@ STAGE PLANS:
value expressions: _col5 (type: decimal(17,2))
Reducer 4
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFSumDecimal(col 5:decimal(17,2)) ->
decimal(17,2)
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:int, col 1:int, col 2:string, col
3:string, col 4:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0]
keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2
(type: string), KEY._col3 (type: string), KEY._col4 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
@@ -1084,18 +692,9 @@ STAGE PLANS:
key expressions: _col4 (type: string), _col3 (type: string),
_col2 (type: string), _col0 (type: int), _col1 (type: int)
sort order: +++++
Map-reduce partition columns: _col4 (type: string), _col3
(type: string), _col2 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- native: true
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 191657247 Data size: 25954241376 Basic
stats: COMPLETE Column stats: NONE
value expressions: _col5 (type: decimal(17,2))
Reducer 5
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: PTF operator: More than 1 argument
expression of aggregation function rank
- vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey3 (type: int),
KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string),
KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string),
VALUE._col0 (type: decimal(17,2))
@@ -1136,11 +735,6 @@ STAGE PLANS:
Statistics: Num rows: 191657247 Data size: 25954241376
Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: decimal(17,2))
Reducer 6
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -1164,32 +758,16 @@ STAGE PLANS:
value expressions: _col0 (type: string), _col1 (type:
string), _col3 (type: int), _col4 (type: decimal(21,6)), _col5 (type:
decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2))
Reducer 7
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: string), VALUE._col1 (type:
string), KEY.reducesinkkey1 (type: int), VALUE._col2 (type: int), VALUE._col3
(type: decimal(21,6)), VALUE._col4 (type: decimal(17,2)), VALUE._col5 (type:
decimal(17,2)), VALUE._col6 (type: decimal(17,2))
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [2, 3, 1, 4, 5, 6, 7, 8]
Statistics: Num rows: 421645952 Data size: 57099332264 Basic
stats: COMPLETE Column stats: NONE
Limit
Number of rows: 100
- Limit Vectorization:
- className: VectorLimitOperator
- native: true
Statistics: Num rows: 100 Data size: 13500 Basic stats:
COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 100 Data size: 13500 Basic stats:
COMPLETE Column stats: NONE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat