http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query78.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query78.q.out b/ql/src/test/results/clientpositive/perf/spark/query78.q.out index 720f654..15c7f04 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query78.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query78.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain with ws as (select d_year AS ws_sold_year, ws_item_sk, ws_bill_customer_sk ws_customer_sk, @@ -55,7 +55,7 @@ order by round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2) limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain with ws as (select d_year AS ws_sold_year, ws_item_sk, ws_bill_customer_sk ws_customer_sk, @@ -112,10 +112,6 @@ order by round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2) limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -144,321 +140,144 @@ STAGE PLANS: alias: date_dim filterExpr: ((d_year = 2000) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 2000), SelectColumnIsNotNull(col 0:int)) predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 10 Map Operator Tree: TableScan alias: date_dim filterExpr: ((d_year = 2000) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 2000), SelectColumnIsNotNull(col 0:int)) predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 13 Map Operator Tree: TableScan alias: web_sales filterExpr: ws_sold_date_sk is not null (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: ws_sold_date_sk is not null (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_bill_customer_sk (type: int), ws_order_number (type: int), ws_quantity (type: int), ws_wholesale_cost (type: decimal(7,2)), ws_sales_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 3, 4, 17, 18, 19, 21] Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col3 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col3 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 15 Map Operator Tree: TableScan alias: web_returns Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Select Operator expressions: wr_item_sk (type: int), wr_order_number (type: int) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [2, 13] Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 19 Map Operator Tree: TableScan alias: catalog_sales filterExpr: ((cs_item_sk = cs_item_sk) and cs_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongColumn(col 15:int, col 15:int), SelectColumnIsNotNull(col 0:int)) predicate: ((cs_item_sk = cs_item_sk) and cs_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 143994918 Data size: 19499804476 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_order_number (type: int), cs_quantity (type: int), cs_wholesale_cost (type: decimal(7,2)), cs_sales_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 3, 15, 17, 18, 19, 21] Statistics: Num rows: 143994918 Data size: 19499804476 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int), _col3 (type: int) sort order: ++ Map-reduce partition columns: _col2 (type: int), _col3 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 143994918 Data size: 19499804476 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 21 Map Operator Tree: TableScan alias: catalog_returns Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Select Operator expressions: cr_item_sk (type: int), cr_order_number (type: int) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [2, 16] Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 7 Map Operator Tree: TableScan alias: store_sales filterExpr: ss_sold_date_sk is not null (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: ss_sold_date_sk is not null (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int), ss_wholesale_cost (type: decimal(7,2)), ss_sales_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 2, 3, 9, 10, 11, 13] Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col3 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col3 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 9 Map Operator Tree: TableScan alias: store_returns Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Select Operator expressions: sr_item_sk (type: int), sr_ticket_number (type: int) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [2, 9] Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Reducer 11 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -482,23 +301,9 @@ STAGE PLANS: value expressions: _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) Reducer 12 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 2:bigint) -> bigint, VectorUDAFSumDecimal(col 3:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 4:decimal(17,2)) -> decimal(17,2) - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:int, col 1:int - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1, 2] keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -506,27 +311,14 @@ STAGE PLANS: Select Operator expressions: _col1 (type: int), _col0 (type: int), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1, 0, 2, 3, 4] Statistics: Num rows: 43560808 Data size: 5923010147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col0 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 43560808 Data size: 5923010147 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) Reducer 14 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -550,11 +342,6 @@ STAGE PLANS: Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Reducer 17 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -578,23 +365,9 @@ STAGE PLANS: value expressions: _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) Reducer 18 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 2:bigint) -> bigint, VectorUDAFSumDecimal(col 3:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 4:decimal(17,2)) -> decimal(17,2) - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:int, col 1:int - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1, 2] keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -602,27 +375,14 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 2, 3, 4] Statistics: Num rows: 43558464 Data size: 5898691072 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 43558464 Data size: 5898691072 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)) Reducer 2 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -645,11 +405,6 @@ STAGE PLANS: Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) Reducer 20 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -674,23 +429,9 @@ STAGE PLANS: value expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Reducer 3 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 2:bigint) -> bigint, VectorUDAFSumDecimal(col 3:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 4:decimal(17,2)) -> decimal(17,2) - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:int, col 1:int - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1, 2] keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -698,27 +439,14 @@ STAGE PLANS: Select Operator expressions: _col1 (type: int), _col0 (type: int), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1, 0, 2, 3, 4] Statistics: Num rows: 174238687 Data size: 15371387547 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col0 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 174238687 Data size: 15371387547 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) Reducer 4 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -738,11 +466,6 @@ STAGE PLANS: Statistics: Num rows: 63887519 Data size: 5636175497 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col7 (type: bigint), _col8 (type: decimal(17,2)), _col9 (type: decimal(17,2)) Reducer 5 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -766,44 +489,22 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Reducer 6 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Select Operator expressions: 2000 (type: int), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey2 (type: bigint), KEY.reducesinkkey3 (type: decimal(17,2)), KEY.reducesinkkey4 (type: decimal(17,2)), KEY.reducesinkkey5 (type: bigint), KEY.reducesinkkey6 (type: decimal(18,2)), KEY.reducesinkkey7 (type: decimal(18,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [9, 0, 1, 8, 2, 3, 4, 5, 6, 7] - selectExpressions: ConstantVectorExpression(val 2000) -> 9:int Statistics: Num rows: 23425424 Data size: 2066597727 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 - Limit Vectorization: - className: VectorLimitOperator - native: true Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 8 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map:
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query79.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query79.q.out b/ql/src/test/results/clientpositive/perf/spark/query79.q.out index 4a7c8ba..a83090f 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query79.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query79.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain select c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit from @@ -20,7 +20,7 @@ select order by c_last_name,c_first_name,substr(s_city,1,30), profit limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain select c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit from @@ -42,10 +42,6 @@ select order by c_last_name,c_first_name,substr(s_city,1,30), profit limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -62,40 +58,18 @@ STAGE PLANS: alias: store filterExpr: (s_number_employees BETWEEN 200 AND 295 and s_store_sk is not null) (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColumnBetween(col 6:int, left 200, right 295), SelectColumnIsNotNull(col 0:int)) predicate: (s_number_employees BETWEEN 200 AND 295 and s_store_sk is not null) (type: boolean) Statistics: Num rows: 189 Data size: 361171 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s_store_sk (type: int), s_city (type: string) outputColumnNames: _col0, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 22] Statistics: Num rows: 189 Data size: 361171 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col4 (type: int) 1 _col0 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work Map 9 @@ -104,40 +78,18 @@ STAGE PLANS: alias: household_demographics filterExpr: (((hd_dep_count = 8) or (hd_vehicle_count > 0)) and hd_demo_sk is not null) (type: boolean) Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 3:int, val 8), FilterLongColGreaterLongScalar(col 4:int, val 0)), SelectColumnIsNotNull(col 0:int)) predicate: (((hd_dep_count = 8) or (hd_vehicle_count > 0)) and hd_demo_sk is not null) (type: boolean) Statistics: Num rows: 6000 Data size: 642000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hd_demo_sk (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 6000 Data size: 642000 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work @@ -156,134 +108,60 @@ STAGE PLANS: alias: customer filterExpr: c_customer_sk is not null (type: boolean) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: c_customer_sk is not null (type: boolean) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string) outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 8, 9] Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 4 Map Operator Tree: TableScan alias: store_sales filterExpr: (ss_sold_date_sk is not null and ss_store_sk is not null and ss_hdemo_sk is not null and ss_customer_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 7:int), SelectColumnIsNotNull(col 5:int), SelectColumnIsNotNull(col 3:int)) predicate: (ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_hdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_coupon_amt (type: decimal(7,2)), ss_net_profit (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 3, 5, 6, 7, 9, 19, 22] Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 7 Map Operator Tree: TableScan alias: date_dim filterExpr: ((d_year) IN (1998, 1999, 2000) and (d_dow = 1) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColumnInList(col 6:int, values [1998, 1999, 2000]), FilterLongColEqualLongScalar(col 7:int, val 1), SelectColumnIsNotNull(col 0:int)) predicate: ((d_dow = 1) and (d_year) IN (1998, 1999, 2000) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Reducer 2 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -305,32 +183,16 @@ STAGE PLANS: value expressions: _col3 (type: int), _col4 (type: decimal(17,2)) Reducer 3 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: decimal(17,2)), KEY.reducesinkkey3 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 4, 5, 3] Statistics: Num rows: 421657640 Data size: 37198759428 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 - Limit Vectorization: - className: VectorLimitOperator - native: true Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -339,11 +201,6 @@ STAGE PLANS: Reducer 5 Local Work: Map Reduce Local Work - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -387,23 +244,9 @@ STAGE PLANS: value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)) Reducer 6 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) - Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 4:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 5:decimal(17,2)) -> decimal(17,2) - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:int, col 1:int, col 2:int, col 3:string - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1] keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -411,19 +254,11 @@ STAGE PLANS: Select Operator expressions: _col2 (type: int), _col0 (type: int), _col3 (type: string), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [2, 0, 3, 4, 5] Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2))