http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query8.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query8.q.out
b/ql/src/test/results/clientpositive/perf/spark/query8.q.out
index c231df7..6b14eb9 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query8.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query8.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
select s_store_name
,sum(ss_net_profit)
from store_sales
@@ -105,7 +105,7 @@ select s_store_name
order by s_store_name
limit 100
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
select s_store_name
,sum(ss_net_profit)
from store_sales
@@ -212,10 +212,6 @@ select s_store_name
order by s_store_name
limit 100
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
Stage-3 is a root stage
Stage-2 depends on stages: Stage-3
@@ -239,76 +235,34 @@ STAGE PLANS:
alias: customer
filterExpr: ((c_preferred_cust_flag = 'Y') and
c_current_addr_sk is not null) (type: boolean)
Statistics: Num rows: 80000000 Data size: 68801615852 Basic
stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children:
FilterStringGroupColEqualStringScalar(col 10:string, val Y),
SelectColumnIsNotNull(col 4:int))
predicate: ((c_preferred_cust_flag = 'Y') and
c_current_addr_sk is not null) (type: boolean)
Statistics: Num rows: 40000000 Data size: 34400807926
Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: c_current_addr_sk (type: int)
outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [4]
Statistics: Num rows: 40000000 Data size: 34400807926
Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 40000000 Data size: 34400807926
Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 6
Map Operator Tree:
TableScan
alias: customer_address
filterExpr: ((substr(ca_zip, 1, 5)) IN ('89436', '30868',
'65085', '22977', '83927', '77557', '58429', '40697', '80614', '10502',
'32779', '91137', '61265', '98294', '17921', '18427', '21203', '59362',
'87291', '84093', '21505', '17184', '10866', '67898', '25797', '28055',
'18377', '80332', '74535', '21757', '29742', '90885', '29898', '17819',
'40811', '25990', '47513', '89531', '91068', '10391', '18846', '99223',
'82637', '41368', '83658', '86199', '81625', '26696', '89338', '88425',
'32200', '81427', '19053', '77471', '36610', '99823', '43276', '41249',
'48584', '83550', '82276', '18842', '78890', '14090', '38123', '40936',
'34425', '19850', '43286', '80072', '79188', '54191', '11395', '50497',
'84861', '90733', '21068', '57666', '37119', '25004', '57835', '70067',
'62878', '95806', '19303', '18840', '19124', '29785', '16737', '16022',
'49613', '89977', '68310', '60069', '98360', '48649', '39050', '41793',
'25002', '27413', '39736', '47208', '16515', '94808', '57
648', '15009', '80015', '42961', '63982', '21744', '71853', '81087', '67468',
'34175', '64008', '20261', '11201', '51799', '48043', '45645', '61163',
'48375', '36447', '57042', '21218', '41100', '89951', '22745', '35851',
'83326', '61125', '78298', '80752', '49858', '52940', '96976', '63792',
'11376', '53582', '18717', '90226', '50530', '94203', '99447', '27670',
'96577', '57856', '56372', '16165', '23427', '54561', '28806', '44439',
'22926', '30123', '61451', '92397', '56979', '92309', '70873', '13355',
'21801', '46346', '37562', '56458', '28286', '47306', '99555', '69399',
'26234', '47546', '49661', '88601', '35943', '39936', '25632', '24611',
'44166', '56648', '30379', '59785', '11110', '14329', '93815', '52226',
'71381', '13842', '25612', '63294', '14664', '21077', '82626', '18799',
'60915', '81020', '56447', '76619', '11433', '13414', '42548', '92713',
'70467', '30884', '47484', '16072', '38936', '13036', '88376', '45539',
'35901', '19506', '65690', '73957', '71850', '49231', '
14276', '20005', '18384', '76615', '11635', '38177', '55607', '41369',
'95447', '58581', '58149', '91946', '33790', '76232', '75692', '95464',
'22246', '51061', '56692', '53121', '77209', '15482', '10688', '14868',
'45907', '73520', '72666', '25734', '17959', '24677', '66446', '94627',
'53535', '15560', '41967', '69297', '11929', '59403', '33283', '52232',
'57350', '43933', '40921', '36635', '10827', '71286', '19736', '80619',
'25251', '95042', '15526', '36496', '55854', '49124', '81980', '35375',
'49157', '63512', '28944', '14946', '36503', '54010', '18767', '23969',
'43905', '66979', '33113', '21286', '58471', '59080', '13395', '79144',
'70373', '67031', '38360', '26705', '50906', '52406', '26066', '73146',
'15884', '31897', '30045', '61068', '45550', '92454', '13376', '14354',
'19770', '22928', '97790', '50723', '46081', '30202', '14410', '20223',
'88500', '67298', '13261', '14172', '81410', '93578', '83583', '46047',
'94167', '82564', '21156', '15799', '86709', '37931', '74703',
'83103', '23054', '70470', '72008', '49247', '91911', '69998', '20961',
'70070', '63197', '54853', '88191', '91830', '49521', '19454', '81450',
'89091', '62378', '25683', '61869', '51744', '36580', '85778', '36871',
'48121', '28810', '83712', '45486', '67393', '26935', '42393', '20132',
'55349', '86057', '21309', '80218', '10094', '11357', '48819', '39734',
'40758', '30432', '21204', '29467', '30214', '61024', '55307', '74621',
'11622', '68908', '33032', '52868', '99194', '99900', '84936', '69036',
'99149', '45013', '32895', '59004', '32322', '14933', '32936', '33562',
'72550', '27385', '58049', '58200', '16808', '21360', '32961', '18586',
'79307', '15492') and substr(substr(ca_zip, 1, 5), 1, 2) is not null) (type:
boolean)
Statistics: Num rows: 40000000 Data size: 40595195284 Basic
stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children:
FilterStringColumnInList(col 14, values 89436, 30868, 65085, 22977, 83927,
77557, 58429, 40697, 80614, 10502, 32779, 91137, 61265, 98294, 17921, 18427,
21203, 59362, 87291, 84093, 21505, 17184, 10866, 67898, 25797, 28055, 18377,
80332, 74535, 21757, 29742, 90885, 29898, 17819, 40811, 25990, 47513, 89531,
91068, 10391, 18846, 99223, 82637, 41368, 83658, 86199, 81625, 26696, 89338,
88425, 32200, 81427, 19053, 77471, 36610, 99823, 43276, 41249, 48584, 83550,
82276, 18842, 78890, 14090, 38123, 40936, 34425, 19850, 43286, 80072, 79188,
54191, 11395, 50497, 84861, 90733, 21068, 57666, 37119, 25004, 57835, 70067,
62878, 95806, 19303, 18840, 19124, 29785, 16737, 16022, 49613, 89977, 68310,
60069, 98360, 48649, 39050, 41793, 25002, 27413, 39736, 47208, 16515, 94808,
57648, 15009, 80015, 42961, 63982, 21744, 71853, 81087, 67468, 34175, 64008,
20261, 11201, 51799, 48043, 45645, 61163, 48375, 36447, 57042, 21218, 41100, 89
951, 22745, 35851, 83326, 61125, 78298, 80752, 49858, 52940, 96976, 63792,
11376, 53582, 18717, 90226, 50530, 94203, 99447, 27670, 96577, 57856, 56372,
16165, 23427, 54561, 28806, 44439, 22926, 30123, 61451, 92397, 56979, 92309,
70873, 13355, 21801, 46346, 37562, 56458, 28286, 47306, 99555, 69399, 26234,
47546, 49661, 88601, 35943, 39936, 25632, 24611, 44166, 56648, 30379, 59785,
11110, 14329, 93815, 52226, 71381, 13842, 25612, 63294, 14664, 21077, 82626,
18799, 60915, 81020, 56447, 76619, 11433, 13414, 42548, 92713, 70467, 30884,
47484, 16072, 38936, 13036, 88376, 45539, 35901, 19506, 65690, 73957, 71850,
49231, 14276, 20005, 18384, 76615, 11635, 38177, 55607, 41369, 95447, 58581,
58149, 91946, 33790, 76232, 75692, 95464, 22246, 51061, 56692, 53121, 77209,
15482, 10688, 14868, 45907, 73520, 72666, 25734, 17959, 24677, 66446, 94627,
53535, 15560, 41967, 69297, 11929, 59403, 33283, 52232, 57350, 43933, 40921,
36635, 10827, 71286, 19736, 80619, 25251, 95042, 15526, 36496, 55854, 49124
, 81980, 35375, 49157, 63512, 28944, 14946, 36503, 54010, 18767, 23969, 43905,
66979, 33113, 21286, 58471, 59080, 13395, 79144, 70373, 67031, 38360, 26705,
50906, 52406, 26066, 73146, 15884, 31897, 30045, 61068, 45550, 92454, 13376,
14354, 19770, 22928, 97790, 50723, 46081, 30202, 14410, 20223, 88500, 67298,
13261, 14172, 81410, 93578, 83583, 46047, 94167, 82564, 21156, 15799, 86709,
37931, 74703, 83103, 23054, 70470, 72008, 49247, 91911, 69998, 20961, 70070,
63197, 54853, 88191, 91830, 49521, 19454, 81450, 89091, 62378, 25683, 61869,
51744, 36580, 85778, 36871, 48121, 28810, 83712, 45486, 67393, 26935, 42393,
20132, 55349, 86057, 21309, 80218, 10094, 11357, 48819, 39734, 40758, 30432,
21204, 29467, 30214, 61024, 55307, 74621, 11622, 68908, 33032, 52868, 99194,
99900, 84936, 69036, 99149, 45013, 32895, 59004, 32322, 14933, 32936, 33562,
72550, 27385, 58049, 58200, 16808, 21360, 32961, 18586, 79307, 15492)(children:
StringSubstrColStartLen(col 9:string, start 0, length 5) -> 14:strin
g), SelectColumnIsNotNull(col 15:string)(children: StringSubstrColStartLen(col
14:string, start 0, length 2)(children: StringSubstrColStartLen(col 9:string,
start 0, length 5) -> 14:string) -> 15:string))
predicate: ((substr(ca_zip, 1, 5)) IN ('89436', '30868',
'65085', '22977', '83927', '77557', '58429', '40697', '80614', '10502',
'32779', '91137', '61265', '98294', '17921', '18427', '21203', '59362',
'87291', '84093', '21505', '17184', '10866', '67898', '25797', '28055',
'18377', '80332', '74535', '21757', '29742', '90885', '29898', '17819',
'40811', '25990', '47513', '89531', '91068', '10391', '18846', '99223',
'82637', '41368', '83658', '86199', '81625', '26696', '89338', '88425',
'32200', '81427', '19053', '77471', '36610', '99823', '43276', '41249',
'48584', '83550', '82276', '18842', '78890', '14090', '38123', '40936',
'34425', '19850', '43286', '80072', '79188', '54191', '11395', '50497',
'84861', '90733', '21068', '57666', '37119', '25004', '57835', '70067',
'62878', '95806', '19303', '18840', '19124', '29785', '16737', '16022',
'49613', '89977', '68310', '60069', '98360', '48649', '39050', '41793',
'25002', '27413', '39736', '47208', '16515', '94808', '5
7648', '15009', '80015', '42961', '63982', '21744', '71853', '81087', '67468',
'34175', '64008', '20261', '11201', '51799', '48043', '45645', '61163',
'48375', '36447', '57042', '21218', '41100', '89951', '22745', '35851',
'83326', '61125', '78298', '80752', '49858', '52940', '96976', '63792',
'11376', '53582', '18717', '90226', '50530', '94203', '99447', '27670',
'96577', '57856', '56372', '16165', '23427', '54561', '28806', '44439',
'22926', '30123', '61451', '92397', '56979', '92309', '70873', '13355',
'21801', '46346', '37562', '56458', '28286', '47306', '99555', '69399',
'26234', '47546', '49661', '88601', '35943', '39936', '25632', '24611',
'44166', '56648', '30379', '59785', '11110', '14329', '93815', '52226',
'71381', '13842', '25612', '63294', '14664', '21077', '82626', '18799',
'60915', '81020', '56447', '76619', '11433', '13414', '42548', '92713',
'70467', '30884', '47484', '16072', '38936', '13036', '88376', '45539',
'35901', '19506', '65690', '73957', '71850', '49231',
'14276', '20005', '18384', '76615', '11635', '38177', '55607', '41369',
'95447', '58581', '58149', '91946', '33790', '76232', '75692', '95464',
'22246', '51061', '56692', '53121', '77209', '15482', '10688', '14868',
'45907', '73520', '72666', '25734', '17959', '24677', '66446', '94627',
'53535', '15560', '41967', '69297', '11929', '59403', '33283', '52232',
'57350', '43933', '40921', '36635', '10827', '71286', '19736', '80619',
'25251', '95042', '15526', '36496', '55854', '49124', '81980', '35375',
'49157', '63512', '28944', '14946', '36503', '54010', '18767', '23969',
'43905', '66979', '33113', '21286', '58471', '59080', '13395', '79144',
'70373', '67031', '38360', '26705', '50906', '52406', '26066', '73146',
'15884', '31897', '30045', '61068', '45550', '92454', '13376', '14354',
'19770', '22928', '97790', '50723', '46081', '30202', '14410', '20223',
'88500', '67298', '13261', '14172', '81410', '93578', '83583', '46047',
'94167', '82564', '21156', '15799', '86709', '37931', '74703'
, '83103', '23054', '70470', '72008', '49247', '91911', '69998', '20961',
'70070', '63197', '54853', '88191', '91830', '49521', '19454', '81450',
'89091', '62378', '25683', '61869', '51744', '36580', '85778', '36871',
'48121', '28810', '83712', '45486', '67393', '26935', '42393', '20132',
'55349', '86057', '21309', '80218', '10094', '11357', '48819', '39734',
'40758', '30432', '21204', '29467', '30214', '61024', '55307', '74621',
'11622', '68908', '33032', '52868', '99194', '99900', '84936', '69036',
'99149', '45013', '32895', '59004', '32322', '14933', '32936', '33562',
'72550', '27385', '58049', '58200', '16808', '21360', '32961', '18586',
'79307', '15492') and substr(substr(ca_zip, 1, 5), 1, 2) is not null) (type:
boolean)
Statistics: Num rows: 20000000 Data size: 20297597642
Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: substr(ca_zip, 1, 5) (type: string)
outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [14]
- selectExpressions: StringSubstrColStartLen(col
9:string, start 0, length 5) -> 14:string
Statistics: Num rows: 20000000 Data size: 20297597642
Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
- Group By Vectorization:
- aggregators: VectorUDAFCountStar(*) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 14:string
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
@@ -317,71 +271,30 @@ STAGE PLANS:
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkStringOperator
- native: true
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 20000000 Data size:
20297597642 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Map 9
Map Operator Tree:
TableScan
alias: customer_address
filterExpr: (ca_address_sk is not null and
substr(substr(ca_zip, 1, 5), 1, 2) is not null) (type: boolean)
Statistics: Num rows: 40000000 Data size: 40595195284 Basic
stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children:
SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col
15:string)(children: StringSubstrColStartLen(col 14:string, start 0, length
2)(children: StringSubstrColStartLen(col 9:string, start 0, length 5) ->
14:string) -> 15:string))
predicate: (ca_address_sk is not null and
substr(substr(ca_zip, 1, 5), 1, 2) is not null) (type: boolean)
Statistics: Num rows: 40000000 Data size: 40595195284
Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ca_address_sk (type: int), ca_zip (type:
string)
outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 9]
Statistics: Num rows: 40000000 Data size: 40595195284
Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 40000000 Data size: 40595195284
Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Reducer 10
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -405,53 +318,22 @@ STAGE PLANS:
value expressions: _col1 (type: bigint)
Reducer 11
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 22000000 Data size: 22327357890 Basic
stats: COMPLETE Column stats: NONE
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterLongColGreaterLongScalar(col
1:bigint, val 10)
predicate: (_col1 > 10L) (type: boolean)
Statistics: Num rows: 7333333 Data size: 7442452291 Basic
stats: COMPLETE Column stats: NONE
Select Operator
expressions: substr(_col0, 1, 5) (type: string)
outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [2]
- selectExpressions: StringSubstrColStartLen(col
0:string, start 0, length 5) -> 2:string
Statistics: Num rows: 7333333 Data size: 7442452291 Basic
stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
- Group By Vectorization:
- aggregators: VectorUDAFCountStar(*) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 2:string
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
@@ -460,45 +342,19 @@ STAGE PLANS:
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkStringOperator
- native: true
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 7333333 Data size: 7442452291
Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Reducer 12
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 3666666 Data size: 3721225638 Basic
stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(_col1)
- Group By Vectorization:
- aggregators: VectorUDAFCount(col 1:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 0:string
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
@@ -507,45 +363,19 @@ STAGE PLANS:
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkStringOperator
- native: true
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 13666666 Data size: 13870024459
Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Reducer 7
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 10000000 Data size: 10148798821 Basic
stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(_col1)
- Group By Vectorization:
- aggregators: VectorUDAFCount(col 1:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 0:string
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
keys: _col0 (type: string)
mode: hash
outputColumnNames: _col0, _col1
@@ -554,56 +384,27 @@ STAGE PLANS:
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkStringOperator
- native: true
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 13666666 Data size: 13870024459
Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Reducer 8
Execution mode: vectorized
Local Work:
Map Reduce Local Work
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 6833333 Data size: 6935012229 Basic
stats: COMPLETE Column stats: NONE
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterLongColEqualLongScalar(col
1:bigint, val 2)
predicate: (_col1 = 2L) (type: boolean)
Statistics: Num rows: 1 Data size: 1014 Basic stats:
COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string)
outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0]
Statistics: Num rows: 1 Data size: 1014 Basic stats:
COMPLETE Column stats: NONE
Spark HashTable Sink Operator
- Spark Hash Table Sink Vectorization:
- className: VectorSparkHashTableSinkOperator
- native: true
keys:
0 substr(_col0, 1, 2) (type: string)
1 substr(_col2, 1, 2) (type: string)
@@ -618,22 +419,12 @@ STAGE PLANS:
alias: store
filterExpr: (s_store_sk is not null and substr(s_zip, 1, 2)
is not null) (type: boolean)
Statistics: Num rows: 1704 Data size: 3256276 Basic stats:
COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children:
SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col
30:string)(children: StringSubstrColStartLen(col 25:string, start 0, length 2)
-> 30:string))
predicate: (s_store_sk is not null and substr(s_zip, 1, 2)
is not null) (type: boolean)
Statistics: Num rows: 1704 Data size: 3256276 Basic stats:
COMPLETE Column stats: NONE
Select Operator
expressions: s_store_sk (type: int), s_store_name (type:
string), s_zip (type: string)
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 5, 25]
Statistics: Num rows: 1704 Data size: 3256276 Basic
stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
@@ -641,32 +432,15 @@ STAGE PLANS:
keys:
0 substr(_col0, 1, 2) (type: string)
1 substr(_col2, 1, 2) (type: string)
- Map Join Vectorization:
- bigTableKeyExpressions:
StringSubstrColStartLen(col 25:string, start 0, length 2) -> 30:string
- className: VectorMapJoinInnerBigOnlyStringOperator
- native: true
- nativeConditionsMet:
hive.mapjoin.optimized.hashtable IS true,
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS
true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS
true
outputColumnNames: _col1, _col2
input vertices:
0 Reducer 8
Statistics: Num rows: 1874 Data size: 3581903 Basic
stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
- Spark Hash Table Sink Vectorization:
- className: VectorSparkHashTableSinkOperator
- native: true
keys:
0 _col1 (type: int)
1 _col1 (type: int)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Local Work:
Map Reduce Local Work
@@ -684,93 +458,42 @@ STAGE PLANS:
alias: store_sales
filterExpr: (ss_sold_date_sk is not null and ss_store_sk is
not null) (type: boolean)
Statistics: Num rows: 575995635 Data size: 50814502088 Basic
stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children:
SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 7:int))
predicate: (ss_sold_date_sk is not null and ss_store_sk is
not null) (type: boolean)
Statistics: Num rows: 575995635 Data size: 50814502088
Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ss_sold_date_sk (type: int), ss_store_sk
(type: int), ss_net_profit (type: decimal(7,2))
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 7, 22]
Statistics: Num rows: 575995635 Data size: 50814502088
Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 575995635 Data size: 50814502088
Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type:
decimal(7,2))
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 5
Map Operator Tree:
TableScan
alias: date_dim
filterExpr: ((d_qoy = 1) and (d_year = 2002) and d_date_sk
is not null) (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats:
COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children:
FilterLongColEqualLongScalar(col 10:int, val 1),
FilterLongColEqualLongScalar(col 6:int, val 2002), SelectColumnIsNotNull(col
0:int))
predicate: ((d_qoy = 1) and (d_year = 2002) and d_date_sk
is not null) (type: boolean)
Statistics: Num rows: 18262 Data size: 20435178 Basic
stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int)
outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0]
Statistics: Num rows: 18262 Data size: 20435178 Basic
stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 18262 Data size: 20435178 Basic
stats: COMPLETE Column stats: NONE
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Reducer 2
Local Work:
Map Reduce Local Work
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -805,23 +528,9 @@ STAGE PLANS:
value expressions: _col1 (type: decimal(17,2))
Reducer 3
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFSumDecimal(col 1:decimal(17,2)) ->
decimal(17,2)
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
@@ -829,41 +538,21 @@ STAGE PLANS:
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- native: true
- nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 348477374 Data size: 30742775095 Basic
stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
value expressions: _col1 (type: decimal(17,2))
Reducer 4
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), VALUE._col0
(type: decimal(17,2))
outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 348477374 Data size: 30742775095 Basic
stats: COMPLETE Column stats: NONE
Limit
Number of rows: 100
- Limit Vectorization:
- className: VectorLimitOperator
- native: true
Statistics: Num rows: 100 Data size: 8800 Basic stats:
COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 100 Data size: 8800 Basic stats:
COMPLETE Column stats: NONE
table:
input format:
org.apache.hadoop.mapred.SequenceFileInputFormat