http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query5.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query5.q.out b/ql/src/test/results/clientpositive/perf/spark/query5.q.out index 68a7ded..afcd4e5 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query5.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query5.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain with ssr as (select s_store_id, sum(sales_price) as sales, @@ -125,7 +125,7 @@ with ssr as ,id limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain with ssr as (select s_store_id, sum(sales_price) as sales, @@ -252,10 +252,6 @@ with ssr as ,id limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -273,40 +269,18 @@ STAGE PLANS: alias: store filterExpr: s_store_sk is not null (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: s_store_sk is not null (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s_store_sk (type: int), s_store_id (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work @@ -320,40 +294,18 @@ STAGE PLANS: alias: web_site filterExpr: web_site_sk is not null (type: boolean) Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: web_site_sk is not null (type: boolean) Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: web_site_sk (type: int), web_site_id (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work @@ -378,439 +330,199 @@ STAGE PLANS: alias: store_sales filterExpr: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 7:int)) predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_store_sk (type: int), ss_sold_date_sk (type: int), ss_ext_sales_price (type: decimal(7,2)), ss_net_profit (type: decimal(7,2)), 0 (type: decimal(7,2)), 0 (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [7, 0, 15, 22, 24, 25] - selectExpressions: ConstantVectorExpression(val 0) -> 24:decimal(7,2), ConstantVectorExpression(val 0) -> 25:decimal(7,2) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 633586785 Data size: 55276696920 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 13 Map Operator Tree: TableScan alias: catalog_returns filterExpr: (cr_returned_date_sk is not null and cr_catalog_page_sk is not null) (type: boolean) Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 12:int)) predicate: (cr_catalog_page_sk is not null and cr_returned_date_sk is not null) (type: boolean) Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cr_catalog_page_sk (type: int), cr_returned_date_sk (type: int), 0 (type: decimal(7,2)), 0 (type: decimal(7,2)), cr_return_amount (type: decimal(7,2)), cr_net_loss (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [12, 0, 28, 29, 18, 26] - selectExpressions: ConstantVectorExpression(val 0) -> 28:decimal(7,2), ConstantVectorExpression(val 0) -> 29:decimal(7,2) Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 316788717 Data size: 42056843632 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 14 Map Operator Tree: TableScan alias: date_dim filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-08-18 00:00:00' and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterTimestampColumnBetween(col 29:timestamp, left 1998-08-03 17:00:00.0, right 1998-08-17 17:00:00.0)(children: CastStringToTimestamp(col 2:string) -> 29:timestamp), SelectColumnIsNotNull(col 0:int)) predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-08-18 00:00:00' and d_date_sk is not null) (type: boolean) Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 15 Map Operator Tree: TableScan alias: catalog_page filterExpr: cp_catalog_page_sk is not null (type: boolean) Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: cp_catalog_page_sk is not null (type: boolean) Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cp_catalog_page_sk (type: int), cp_catalog_page_id (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 16 Map Operator Tree: TableScan alias: web_sales filterExpr: (ws_sold_date_sk is not null and ws_web_site_sk is not null) (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 13:int)) predicate: (ws_sold_date_sk is not null and ws_web_site_sk is not null) (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ws_web_site_sk (type: int), ws_sold_date_sk (type: int), ws_ext_sales_price (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)), 0 (type: decimal(7,2)), 0 (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [13, 0, 23, 33, 35, 36] - selectExpressions: ConstantVectorExpression(val 0) -> 35:decimal(7,2), ConstantVectorExpression(val 0) -> 36:decimal(7,2) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 302405606 Data size: 41118416712 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 19 Map Operator Tree: TableScan alias: web_sales filterExpr: (ws_web_site_sk is not null and ws_item_sk is not null and ws_order_number is not null) (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 13:int), SelectColumnIsNotNull(col 3:int), SelectColumnIsNotNull(col 17:int)) predicate: (ws_item_sk is not null and ws_order_number is not null and ws_web_site_sk is not null) (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ws_item_sk (type: int), ws_web_site_sk (type: int), ws_order_number (type: int) outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [3, 13, 17] Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col2 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col2 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 21 Map Operator Tree: TableScan alias: web_returns filterExpr: (wr_returned_date_sk is not null and wr_item_sk is not null and wr_order_number is not null) (type: boolean) Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 2:int), SelectColumnIsNotNull(col 13:int)) predicate: (wr_item_sk is not null and wr_order_number is not null and wr_returned_date_sk is not null) (type: boolean) Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: wr_returned_date_sk (type: int), wr_item_sk (type: int), wr_order_number (type: int), wr_return_amt (type: decimal(7,2)), wr_net_loss (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 2, 13, 15, 23] Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col2 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 6 Map Operator Tree: TableScan alias: store_returns filterExpr: (sr_returned_date_sk is not null and sr_store_sk is not null) (type: boolean) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 7:int)) predicate: (sr_returned_date_sk is not null and sr_store_sk is not null) (type: boolean) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sr_store_sk (type: int), sr_returned_date_sk (type: int), 0 (type: decimal(7,2)), 0 (type: decimal(7,2)), sr_return_amt (type: decimal(7,2)), sr_net_loss (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [7, 0, 21, 22, 11, 19] - selectExpressions: ConstantVectorExpression(val 0) -> 21:decimal(7,2), ConstantVectorExpression(val 0) -> 22:decimal(7,2) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 633586785 Data size: 55276696920 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 7 Map Operator Tree: TableScan alias: date_dim filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-08-18 00:00:00' and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterTimestampColumnBetween(col 29:timestamp, left 1998-08-03 17:00:00.0, right 1998-08-17 17:00:00.0)(children: CastStringToTimestamp(col 2:string) -> 29:timestamp), SelectColumnIsNotNull(col 0:int)) predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-08-18 00:00:00' and d_date_sk is not null) (type: boolean) Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 9 Map Operator Tree: TableScan alias: catalog_sales filterExpr: (cs_sold_date_sk is not null and cs_catalog_page_sk is not null) (type: boolean) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 12:int)) predicate: (cs_catalog_page_sk is not null and cs_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cs_catalog_page_sk (type: int), cs_sold_date_sk (type: int), cs_ext_sales_price (type: decimal(7,2)), cs_net_profit (type: decimal(7,2)), 0 (type: decimal(7,2)), 0 (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [12, 0, 23, 33, 35, 36] - selectExpressions: ConstantVectorExpression(val 0) -> 35:decimal(7,2), ConstantVectorExpression(val 0) -> 36:decimal(7,2) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 316788717 Data size: 42056843632 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Reducer 10 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -827,11 +539,6 @@ STAGE PLANS: Statistics: Num rows: 348467596 Data size: 46262528997 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Reducer 11 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -855,23 +562,9 @@ STAGE PLANS: value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) Reducer 12 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) - Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 1:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 3:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 4:decimal(17,2)) -> decimal(17,2) - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:string - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1, 2, 3] keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -879,22 +572,9 @@ STAGE PLANS: Select Operator expressions: 'catalog channel' (type: string), concat('catalog_page', _col0) (type: string), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), (_col3 - _col4) (type: decimal(18,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [5, 6, 1, 2, 7] - selectExpressions: ConstantVectorExpression(val catalog channel) -> 5:string, StringScalarConcatStringGroupCol(val catalog_page, col 0:string) -> 6:string, DecimalColSubtractDecimalColumn(col 3:decimal(17,2), col 4:decimal(17,2)) -> 7:decimal(18,2) Statistics: Num rows: 191657181 Data size: 25444391433 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col2), sum(_col3), sum(_col4) - Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 1:decimal(17,2)) -> decimal(27,2), VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(27,2), VectorUDAFSumDecimal(col 7:decimal(18,2)) -> decimal(28,2) - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 5:string, col 6:string, ConstantVectorExpression(val 0) -> 8:bigint - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -903,21 +583,12 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2273797803 Data size: 251290313118 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2)) Reducer 17 Local Work: Map Reduce Local Work - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -951,23 +622,9 @@ STAGE PLANS: value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) Reducer 18 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) - Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 1:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 3:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 4:decimal(17,2)) -> decimal(17,2) - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:string - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1, 2, 3] keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -975,22 +632,9 @@ STAGE PLANS: Select Operator expressions: 'web channel' (type: string), concat('web_site', _col0) (type: string), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), (_col3 - _col4) (type: decimal(18,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [5, 6, 1, 2, 7] - selectExpressions: ConstantVectorExpression(val web channel) -> 5:string, StringScalarConcatStringGroupCol(val web_site, col 0:string) -> 6:string, DecimalColSubtractDecimalColumn(col 3:decimal(17,2), col 4:decimal(17,2)) -> 7:decimal(18,2) Statistics: Num rows: 182955399 Data size: 24876643188 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col2), sum(_col3), sum(_col4) - Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 1:decimal(17,2)) -> decimal(27,2), VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(27,2), VectorUDAFSumDecimal(col 7:decimal(18,2)) -> decimal(28,2) - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 5:string, col 6:string, ConstantVectorExpression(val 0) -> 8:bigint - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -999,21 +643,12 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2273797803 Data size: 251290313118 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2)) Reducer 2 Local Work: Map Reduce Local Work - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -1046,11 +681,6 @@ STAGE PLANS: Statistics: Num rows: 766640042 Data size: 66884806171 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) Reducer 20 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -1069,23 +699,9 @@ STAGE PLANS: value expressions: _col0 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Reducer 3 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) - Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 1:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 3:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 4:decimal(17,2)) -> decimal(17,2) - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:string - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1, 2, 3] keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -1093,22 +709,9 @@ STAGE PLANS: Select Operator expressions: 'store channel' (type: string), concat('store', _col0) (type: string), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), (_col3 - _col4) (type: decimal(18,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [5, 6, 1, 2, 7] - selectExpressions: ConstantVectorExpression(val store channel) -> 5:string, StringScalarConcatStringGroupCol(val store, col 0:string) -> 6:string, DecimalColSubtractDecimalColumn(col 3:decimal(17,2), col 4:decimal(17,2)) -> 7:decimal(18,2) Statistics: Num rows: 383320021 Data size: 33442403085 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col2), sum(_col3), sum(_col4) - Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 1:decimal(17,2)) -> decimal(27,2), VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(27,2), VectorUDAFSumDecimal(col 7:decimal(18,2)) -> decimal(28,2) - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 5:string, col 6:string, ConstantVectorExpression(val 0) -> 8:bigint - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -1117,32 +720,14 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2273797803 Data size: 251290313118 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2)) Reducer 4 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) - Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 3:decimal(27,2)) -> decimal(27,2), VectorUDAFSumDecimal(col 4:decimal(27,2)) -> decimal(27,2), VectorUDAFSumDecimal(col 5:decimal(28,2)) -> decimal(28,2) - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:string, col 1:string, col 2:bigint - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1, 2] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col3, _col4, _col5 @@ -1151,49 +736,25 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4] Statistics: Num rows: 1136898901 Data size: 125645156503 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1136898901 Data size: 125645156503 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(27,2)), _col3 (type: decimal(27,2)), _col4 (type: decimal(28,2)) Reducer 5 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(27,2)), VALUE._col1 (type: decimal(27,2)), VALUE._col2 (type: decimal(28,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4] Statistics: Num rows: 1136898901 Data size: 125645156503 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 - Limit Vectorization: - className: VectorLimitOperator - native: true Statistics: Num rows: 100 Data size: 11000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 100 Data size: 11000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query50.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query50.q.out b/ql/src/test/results/clientpositive/perf/spark/query50.q.out index ac7ff3e..0c4528f 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query50.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query50.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain select s_store_name ,s_company_id @@ -56,7 +56,7 @@ order by s_store_name ,s_zip limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain select s_store_name ,s_company_id @@ -114,10 +114,6 @@ order by s_store_name ,s_zip limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -134,40 +130,18 @@ STAGE PLANS: alias: store filterExpr: s_store_sk is not null (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: s_store_sk is not null (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s_store_sk (type: int), s_store_name (type: string), s_company_id (type: int), s_street_number (type: string), s_street_name (type: string), s_street_type (type: string), s_suite_number (type: string), s_city (type: string), s_county (type: string), s_state (type: string), s_zip (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 5, 16, 18, 19, 20, 21, 22, 23, 24, 25] Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col10 (type: int) 1 _col0 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work @@ -187,176 +161,79 @@ STAGE PLANS: alias: store_returns filterExpr: (sr_ticket_number is not null and sr_item_sk is not null and sr_customer_sk is not null and sr_returned_date_sk is not null) (type: boolean) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 9:int), SelectColumnIsNotNull(col 2:int), SelectColumnIsNotNull(col 3:int), SelectColumnIsNotNull(col 0:int)) predicate: (sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) (type: boolean) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_customer_sk (type: int), sr_ticket_number (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 2, 3, 9] Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 7 Map Operator Tree: TableScan alias: d2 filterExpr: ((d_year = 2000) and (d_moy = 9) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 2000), FilterLongColEqualLongScalar(col 8:int, val 9), SelectColumnIsNotNull(col 0:int)) predicate: ((d_moy = 9) and (d_year = 2000) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 8 Map Operator Tree: TableScan alias: store_sales filterExpr: (ss_ticket_number is not null and ss_item_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 9:int), SelectColumnIsNotNull(col 2:int), SelectColumnIsNotNull(col 3:int), SelectColumnIsNotNull(col 7:int), SelectColumnIsNotNull(col 0:int)) predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 2, 3, 7, 9] Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) sort order: +++ Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col3 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 9 Map Operator Tree: TableScan alias: d1 filterExpr: d_date_sk is not null (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: d_date_sk is not null (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Reducer 2 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -373,11 +250,6 @@ STAGE PLANS: Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Reducer 3 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -396,11 +268,6 @@ STAGE PLANS: Reducer 4 Local Work: Map Reduce Local Work - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -439,23 +306,9 @@ STAGE PLANS: value expressions: _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint) Reducer 5 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 10:bigint) -> bigint, VectorUDAFSumLong(col 11:bigint) -> bigint, VectorUDAFSumLong(col 12:bigint) -> bigint, VectorUDAFSumLong(col 13:bigint) -> bigint, VectorUDAFSumLong(col 14:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:string, col 1:int, col 2:string, col 3:string, col 4:string, col 5:string, col 6:string, col 7:string, col 8:string, col 9:string - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1, 2, 3, 4] keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string), KEY._col7 (type: string), KEY._col8 (type: string), KEY._col9 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -463,41 +316,21 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) sort order: ++++++++++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint) Reducer 6 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: string), KEY.reducesinkkey7 (type: string), KEY.reducesinkkey8 (type: string), KEY.reducesinkkey9 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint), VALUE._col3 (type: bigint), VALUE._col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 - Limit Vectorization: - className: VectorLimitOperator - native: true Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat