http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query24.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query24.q.out b/ql/src/test/results/clientpositive/perf/spark/query24.q.out index 5013da9..a34d3e8 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query24.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query24.q.out @@ -1,5 +1,5 @@ Warning: Map Join MAPJOIN[104][bigTable=?] in task 'Stage-1:MAPRED' is a cross product -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain with ssales as (select c_last_name ,c_first_name @@ -48,7 +48,7 @@ group by c_last_name having sum(netpaid) > (select 0.05*avg(netpaid) from ssales) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain with ssales as (select c_last_name ,c_first_name @@ -97,10 +97,6 @@ group by c_last_name having sum(netpaid) > (select 0.05*avg(netpaid) from ssales) POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-3 is a root stage Stage-2 depends on stages: Stage-3 @@ -119,40 +115,18 @@ STAGE PLANS: alias: store filterExpr: ((s_market_id = 7) and s_store_sk is not null and s_zip is not null) (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 10:int, val 7), SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 25:string)) predicate: ((s_market_id = 7) and s_store_sk is not null and s_zip is not null) (type: boolean) Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s_store_sk (type: int), s_store_name (type: string), s_state (type: string), s_zip (type: string) outputColumnNames: _col0, _col1, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 5, 24, 25] Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work @@ -173,22 +147,12 @@ STAGE PLANS: alias: store_sales filterExpr: (ss_ticket_number is not null and ss_item_sk is not null and ss_store_sk is not null and ss_customer_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 9:int), SelectColumnIsNotNull(col 2:int), SelectColumnIsNotNull(col 7:int), SelectColumnIsNotNull(col 3:int)) predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_sales_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [2, 3, 7, 9, 13] Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -196,10 +160,6 @@ STAGE PLANS: keys: 0 _col2 (type: int) 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinInnerLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col8, _col9 input vertices: 1 Map 19 @@ -208,22 +168,9 @@ STAGE PLANS: key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work Map 20 @@ -232,178 +179,80 @@ STAGE PLANS: alias: customer filterExpr: (c_customer_sk is not null and c_birth_country is not null) (type: boolean) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 14:string)) predicate: (c_birth_country is not null and c_customer_sk is not null) (type: boolean) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string), c_birth_country (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 8, 9, 14] Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 21 Map Operator Tree: TableScan alias: item filterExpr: i_item_sk is not null (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: i_item_sk is not null (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i_item_sk (type: int), i_current_price (type: decimal(7,2)), i_size (type: string), i_color (type: string), i_units (type: string), i_manager_id (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 5, 15, 17, 18, 20] Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(7,2)), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 22 Map Operator Tree: TableScan alias: store_returns filterExpr: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 9:int), SelectColumnIsNotNull(col 2:int)) predicate: (sr_item_sk is not null and sr_ticket_number is not null) (type: boolean) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sr_item_sk (type: int), sr_ticket_number (type: int) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [2, 9] Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 23 Map Operator Tree: TableScan alias: customer_address filterExpr: (upper(ca_country) is not null and ca_zip is not null) (type: boolean) Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 14:string)(children: StringUpper(col 10:string) -> 14:string), SelectColumnIsNotNull(col 9:string)) predicate: (ca_zip is not null and upper(ca_country) is not null) (type: boolean) Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ca_state (type: string), ca_zip (type: string), ca_country (type: string) outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [8, 9, 10] Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string), upper(_col2) (type: string) sort order: ++ Map-reduce partition columns: _col1 (type: string), upper(_col2) (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyExpressions: StringUpper(col 10:string) -> 14:string - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Reducer 13 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -420,11 +269,6 @@ STAGE PLANS: Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string) Reducer 14 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -441,11 +285,6 @@ STAGE PLANS: Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col15 (type: decimal(7,2)), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: int) Reducer 15 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -462,11 +301,6 @@ STAGE PLANS: Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col15 (type: decimal(7,2)), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: int) Reducer 16 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -490,23 +324,9 @@ STAGE PLANS: value expressions: _col10 (type: decimal(17,2)) Reducer 17 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 10:decimal(17,2)) -> decimal(17,2) - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:string, col 1:string, col 2:string, col 3:string, col 4:decimal(7,2), col 5:string, col 6:string, col 7:string, col 8:int, col 9:string - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: decimal(7,2)), KEY._col5 (type: string), KEY._col6 (type: string), KEY._col7 (type: string), KEY._col8 (type: int), KEY._col9 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -514,67 +334,31 @@ STAGE PLANS: Select Operator expressions: _col10 (type: decimal(17,2)) outputColumnNames: _col10 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [10] Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col10), count(_col10) - Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 10:decimal(17,2)) -> decimal(27,2), VectorUDAFCount(col 10:decimal(17,2)) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(27,2)), _col1 (type: bigint) Reducer 18 Execution mode: vectorized Local Work: Map Reduce Local Work - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1) - Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 0:decimal(27,2)) -> decimal(27,2), VectorUDAFCountMerge(col 1:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - native: false - vectorProcessingMode: GLOBAL - projectedOutputColumnNums: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (0.05 * (_col0 / _col1)) (type: decimal(38,12)) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [4] - selectExpressions: DecimalScalarMultiplyDecimalColumn(val 0.05, col 3:decimal(38,13))(children: DecimalColDivideDecimalColumn(col 0:decimal(27,2), col 2:decimal(19,0))(children: CastLongToDecimal(col 1:bigint) -> 2:decimal(19,0)) -> 3:decimal(38,13)) -> 4:decimal(38,12) Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 1 @@ -589,40 +373,18 @@ STAGE PLANS: alias: store filterExpr: ((s_market_id = 7) and s_store_sk is not null and s_zip is not null) (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 10:int, val 7), SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 25:string)) predicate: ((s_market_id = 7) and s_store_sk is not null and s_zip is not null) (type: boolean) Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s_store_sk (type: int), s_store_name (type: string), s_state (type: string), s_zip (type: string) outputColumnNames: _col0, _col1, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 5, 24, 25] Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work @@ -642,223 +404,102 @@ STAGE PLANS: alias: store_sales filterExpr: (ss_ticket_number is not null and ss_item_sk is not null and ss_store_sk is not null and ss_customer_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 9:int), SelectColumnIsNotNull(col 2:int), SelectColumnIsNotNull(col 7:int), SelectColumnIsNotNull(col 3:int)) predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_sales_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [2, 3, 7, 9, 13] Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 10 Map Operator Tree: TableScan alias: store_returns filterExpr: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 9:int), SelectColumnIsNotNull(col 2:int)) predicate: (sr_item_sk is not null and sr_ticket_number is not null) (type: boolean) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sr_item_sk (type: int), sr_ticket_number (type: int) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [2, 9] Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 11 Map Operator Tree: TableScan alias: customer_address filterExpr: (upper(ca_country) is not null and ca_zip is not null) (type: boolean) Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 14:string)(children: StringUpper(col 10:string) -> 14:string), SelectColumnIsNotNull(col 9:string)) predicate: (ca_zip is not null and upper(ca_country) is not null) (type: boolean) Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ca_state (type: string), ca_zip (type: string), ca_country (type: string) outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [8, 9, 10] Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string), upper(_col2) (type: string) sort order: ++ Map-reduce partition columns: _col1 (type: string), upper(_col2) (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyExpressions: StringUpper(col 10:string) -> 14:string - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 7 Map Operator Tree: TableScan alias: item filterExpr: ((i_color = 'orchid') and i_item_sk is not null) (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 17:string, val orchid), SelectColumnIsNotNull(col 0:int)) predicate: ((i_color = 'orchid') and i_item_sk is not null) (type: boolean) Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i_item_sk (type: int), i_current_price (type: decimal(7,2)), i_size (type: string), i_units (type: string), i_manager_id (type: int) outputColumnNames: _col0, _col1, _col2, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 5, 15, 18, 20] Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(7,2)), _col2 (type: string), _col4 (type: string), _col5 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 9 Map Operator Tree: TableScan alias: customer filterExpr: (c_customer_sk is not null and c_birth_country is not null) (type: boolean) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 14:string)) predicate: (c_birth_country is not null and c_customer_sk is not null) (type: boolean) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string), c_birth_country (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 8, 9, 14] Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Reducer 2 Local Work: Map Reduce Local Work - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -885,11 +526,6 @@ STAGE PLANS: Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col12 (type: string), _col14 (type: string), _col15 (type: string) Reducer 3 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -906,11 +542,6 @@ STAGE PLANS: Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col12 (type: string), _col14 (type: string), _col15 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string) Reducer 4 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -927,11 +558,6 @@ STAGE PLANS: Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col12 (type: string), _col14 (type: string), _col17 (type: string), _col18 (type: string) Reducer 5 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -957,23 +583,9 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 9:decimal(17,2)) -> decimal(17,2) - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:string, col 1:string, col 2:string, col 3:string, col 4:decimal(7,2), col 5:string, col 6:string, col 7:int, col 8:string - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: decimal(7,2)), KEY._col5 (type: string), KEY._col6 (type: string), KEY._col7 (type: int), KEY._col8 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 @@ -981,21 +593,9 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col9 (type: decimal(17,2)) outputColumnNames: _col1, _col2, _col7, _col9 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 9] Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col9) - Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 9:decimal(17,2)) -> decimal(27,2) - className: VectorGroupByOperator - groupByMode: COMPLETE - keyExpressions: col 0:string, col 1:string, col 2:string - native: false - vectorProcessingMode: STREAMING - projectedOutputColumnNums: [0] keys: _col1 (type: string), _col2 (type: string), _col7 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 @@ -1003,10 +603,6 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string), _col3 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 231911707 Data size: 20459318134 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -1014,34 +610,19 @@ STAGE PLANS: keys: 0 1 - Map Join Vectorization: - className: VectorMapJoinInnerMultiKeyOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col2, _col3, _col4 input vertices: 1 Reducer 18 Statistics: Num rows: 231911707 Data size: 74494745865 Basic stats: COMPLETE Column stats: NONE Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterDecimalColGreaterDecimalColumn(col 3:decimal(27,2), col 4:decimal(38,12)) predicate: (_col3 > _col4) (type: boolean) Statistics: Num rows: 77303902 Data size: 24831581847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1, 0, 2, 3] Statistics: Num rows: 77303902 Data size: 24831581847 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 77303902 Data size: 24831581847 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query25.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query25.q.out b/ql/src/test/results/clientpositive/perf/spark/query25.q.out index 38c1152..3d7f402 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query25.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query25.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain select i_item_id ,i_item_desc @@ -45,7 +45,7 @@ select ,s_store_name limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain select i_item_id ,i_item_desc @@ -92,10 +92,6 @@ select ,s_store_name limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -114,40 +110,18 @@ STAGE PLANS: alias: store filterExpr: s_store_sk is not null (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: s_store_sk is not null (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s_store_sk (type: int), s_store_id (type: string), s_store_name (type: string) outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 5] Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col3 (type: int) 1 _col0 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work @@ -161,40 +135,18 @@ STAGE PLANS: alias: d3 filterExpr: (d_moy BETWEEN 4 AND 10 and (d_year = 2000) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColumnBetween(col 8:int, left 4, right 10), FilterLongColEqualLongScalar(col 6:int, val 2000), SelectColumnIsNotNull(col 0:int)) predicate: ((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) (type: boolean) Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work @@ -208,40 +160,18 @@ STAGE PLANS: alias: d2 filterExpr: (d_moy BETWEEN 4 AND 10 and (d_year = 2000) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColumnBetween(col 8:int, left 4, right 10), FilterLongColEqualLongScalar(col 6:int, val 2000), SelectColumnIsNotNull(col 0:int)) predicate: ((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) (type: boolean) Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work @@ -262,65 +192,32 @@ STAGE PLANS: alias: store_sales filterExpr: (ss_customer_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 3:int), SelectColumnIsNotNull(col 2:int), SelectColumnIsNotNull(col 9:int), SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 7:int)) predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_net_profit (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 2, 3, 7, 9, 22] Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 11 Map Operator Tree: TableScan alias: store_returns filterExpr: (sr_customer_sk is not null and sr_item_sk is not null and sr_ticket_number is not null and sr_returned_date_sk is not null) (type: boolean) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 3:int), SelectColumnIsNotNull(col 2:int), SelectColumnIsNotNull(col 9:int), SelectColumnIsNotNull(col 0:int)) predicate: (sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) (type: boolean) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_customer_sk (type: int), sr_ticket_number (type: int), sr_net_loss (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 2, 3, 9, 19] Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -328,10 +225,6 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinInnerBigOnlyLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col1, _col2, _col3, _col4 input vertices: 1 Map 12 @@ -340,22 +233,9 @@ STAGE PLANS: key expressions: _col1 (type: int), _col2 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col2 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work Map 13 @@ -364,107 +244,51 @@ STAGE PLANS: alias: item filterExpr: i_item_sk is not null (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: i_item_sk is not null (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string) outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 4] Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 7 Map Operator Tree: TableScan alias: d1 filterExpr: ((d_moy = 4) and (d_year = 2000) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 8:int, val 4), FilterLongColEqualLongScalar(col 6:int, val 2000), SelectColumnIsNotNull(col 0:int)) predicate: ((d_moy = 4) and (d_year = 2000) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 8 Map Operator Tree: TableScan alias: catalog_sales filterExpr: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 3:int), SelectColumnIsNotNull(col 15:int), SelectColumnIsNotNull(col 0:int)) predicate: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_net_profit (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 3, 15, 33] Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -472,10 +296,6 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinInnerBigOnlyLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col1, _col2, _col3 input vertices: 1 Map 10 @@ -484,30 +304,12 @@ STAGE PLANS: key expressions: _col2 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col2 (type: int), _col1 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: decimal(7,2)) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work Reducer 2 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -524,11 +326,6 @@ STAGE PLANS: Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: int), _col5 (type: decimal(7,2)) Reducer 3 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -547,11 +344,6 @@ STAGE PLANS: Reducer 4 Local Work: Map Reduce Local Work - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -586,23 +378,9 @@ STAGE PLANS: value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)) Reducer 5 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) - Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 4:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 5:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 6:decimal(17,2)) -> decimal(17,2) - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:string, col 1:string, col 2:string, col 3:string - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1, 2] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -610,52 +388,27 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) sort order: ++++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)) Reducer 6 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(17,2)), VALUE._col2 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 - Limit Vectorization: - className: VectorLimitOperator - native: true Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 9 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: