http://git-wip-us.apache.org/repos/asf/hive/blob/470ba3e2/ql/src/test/results/clientpositive/perf/spark/query37.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query37.q.out b/ql/src/test/results/clientpositive/perf/spark/query37.q.out index bce0d68..fa25d4c 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query37.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query37.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select i_item_id ,i_item_desc ,i_current_price @@ -14,7 +14,7 @@ select i_item_id order by i_item_id limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select i_item_id ,i_item_desc ,i_current_price @@ -30,6 +30,10 @@ select i_item_id order by i_item_id limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -46,18 +50,40 @@ STAGE PLANS: alias: date_dim filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-06-02 00:00:00' AND TIMESTAMP'2001-08-01 00:00:00' and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterTimestampColumnBetween(col 29:timestamp, left 2001-06-01 17:00:00.0, right 2001-07-31 17:00:00.0)(children: CastStringToTimestamp(col 2:string) -> 29:timestamp), SelectColumnIsNotNull(col 0:int)) predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-06-02 00:00:00' AND TIMESTAMP'2001-08-01 00:00:00' and d_date_sk is not null) (type: boolean) Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -75,51 +101,107 @@ STAGE PLANS: alias: catalog_sales filterExpr: cs_item_sk is not null (type: boolean) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 15:int) predicate: cs_item_sk is not null (type: boolean) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cs_item_sk (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [15] Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 5 Map Operator Tree: TableScan alias: item filterExpr: ((i_manufact_id) IN (678, 964, 918, 849) and i_current_price BETWEEN 22 AND 52 and i_item_sk is not null) (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColumnInList(col 13:int, values [678, 964, 918, 849]), FilterDecimal64ColumnBetween(col 5:decimal(7,2)/DECIMAL_64, decimal64LeftVal 2200, decimalLeftVal 2200, decimal64RightVal 5200, decimalRightVal 5200), SelectColumnIsNotNull(col 0:int)) predicate: ((i_manufact_id) IN (678, 964, 918, 849) and i_current_price BETWEEN 22 AND 52 and i_item_sk is not null) (type: boolean) Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string), i_current_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 4, 5] Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 6 Map Operator Tree: TableScan alias: inventory filterExpr: (inv_quantity_on_hand BETWEEN 100 AND 500 and inv_item_sk is not null and inv_date_sk is not null) (type: boolean) Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColumnBetween(col 3:int, left 100, right 500), SelectColumnIsNotNull(col 1:int), SelectColumnIsNotNull(col 0:int)) predicate: (inv_date_sk is not null and inv_item_sk is not null and inv_quantity_on_hand BETWEEN 100 AND 500) (type: boolean) Statistics: Num rows: 4176000 Data size: 65980122 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: inv_date_sk (type: int), inv_item_sk (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 4176000 Data size: 65980122 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -127,6 +209,10 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerBigOnlyLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col1 input vertices: 1 Map 7 @@ -135,11 +221,29 @@ STAGE PLANS: key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 4593600 Data size: 72578135 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tagging not supported + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -164,8 +268,21 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:string, col 2:decimal(7,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: decimal(7,2)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -173,21 +290,41 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string), _col2 (type: decimal(7,2)) Reducer 4 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/470ba3e2/ql/src/test/results/clientpositive/perf/spark/query38.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query38.q.out b/ql/src/test/results/clientpositive/perf/spark/query38.q.out index 0064177..01b3c59 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query38.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query38.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain +PREHOOK: query: explain vectorization expression select count(*) from ( select distinct c_last_name, c_first_name, d_date from store_sales, date_dim, customer @@ -20,7 +20,7 @@ select count(*) from ( ) hot_cust limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization expression select count(*) from ( select distinct c_last_name, c_first_name, d_date from store_sales, date_dim, customer @@ -42,6 +42,10 @@ select count(*) from ( ) hot_cust limit 100 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -60,18 +64,40 @@ STAGE PLANS: alias: date_dim filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColumnBetween(col 3:int, left 1212, right 1223), SelectColumnIsNotNull(col 0:int)) predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_date (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2] Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -85,18 +111,40 @@ STAGE PLANS: alias: date_dim filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColumnBetween(col 3:int, left 1212, right 1223), SelectColumnIsNotNull(col 0:int)) predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_date (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2] Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -110,18 +158,40 @@ STAGE PLANS: alias: date_dim filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColumnBetween(col 3:int, left 1212, right 1223), SelectColumnIsNotNull(col 0:int)) predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_date (type: string) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2] Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -144,12 +214,22 @@ STAGE PLANS: alias: store_sales filterExpr: (ss_sold_date_sk is not null and ss_customer_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 3:int)) predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 3] Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -157,6 +237,10 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col1, _col3 input vertices: 1 Map 6 @@ -165,9 +249,22 @@ STAGE PLANS: key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Map 12 @@ -176,32 +273,65 @@ STAGE PLANS: alias: customer filterExpr: c_customer_sk is not null (type: boolean) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: c_customer_sk is not null (type: boolean) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 8, 9] Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 13 Map Operator Tree: TableScan alias: web_sales filterExpr: (ws_sold_date_sk is not null and ws_bill_customer_sk is not null) (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 4:int)) predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 4] Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -209,6 +339,10 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col1, _col3 input vertices: 1 Map 16 @@ -217,9 +351,22 @@ STAGE PLANS: key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Map 17 @@ -228,32 +375,65 @@ STAGE PLANS: alias: customer filterExpr: c_customer_sk is not null (type: boolean) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: c_customer_sk is not null (type: boolean) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 8, 9] Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Map 8 Map Operator Tree: TableScan alias: catalog_sales filterExpr: (cs_sold_date_sk is not null and cs_bill_customer_sk is not null) (type: boolean) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 3:int)) predicate: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 3] Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -261,6 +441,10 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col1, _col3 input vertices: 1 Map 11 @@ -269,15 +453,41 @@ STAGE PLANS: key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: string) Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work Reducer 10 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:string, col 2:string + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -285,15 +495,35 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: COMPLETE + keyExpressions: col 0:string, col 1:string, col 2:string + native: false + vectorProcessingMode: STREAMING + projectedOutputColumnNums: [0] keys: _col1 (type: string), _col0 (type: string), _col2 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 87116929 Data size: 11797382219 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col3) + Group By Vectorization: + aggregators: VectorUDAFCount(col 3:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:string, col 1:string, col 2:string + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -302,9 +532,18 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 304916424 Data size: 33091779879 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 14 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tagging not supported + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -326,8 +565,21 @@ STAGE PLANS: Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Reducer 15 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:string, col 2:string + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -335,15 +587,35 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: COMPLETE + keyExpressions: col 0:string, col 1:string, col 2:string + native: false + vectorProcessingMode: STREAMING + projectedOutputColumnNums: [0] keys: _col1 (type: string), _col0 (type: string), _col2 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 43560808 Data size: 5923010113 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col3) + Group By Vectorization: + aggregators: VectorUDAFCount(col 3:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:string, col 1:string, col 2:string + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -352,9 +624,18 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 304916424 Data size: 33091779879 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tagging not supported + vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -376,8 +657,21 @@ STAGE PLANS: Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:string, col 2:string + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -385,15 +679,35 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: COMPLETE + keyExpressions: col 0:string, col 1:string, col 2:string + native: false + vectorProcessingMode: STREAMING + projectedOutputColumnNums: [0] keys: _col1 (type: string), _col0 (type: string), _col2 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 174238687 Data size: 15371387547 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col3) + Group By Vectorization: + aggregators: VectorUDAFCount(col 3:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:string, col 1:string, col 2:string + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -402,13 +716,31 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 304916424 Data size: 33091779879 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 4 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:string, col 2:string + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 @@ -416,41 +748,88 @@ STAGE PLANS: Select Operator expressions: _col3 (type: bigint) outputColumnNames: _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3] Statistics: Num rows: 152458212 Data size: 16545889939 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColEqualLongScalar(col 3:bigint, val 3) predicate: (_col3 = 3L) (type: boolean) Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [] Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: bigint) Reducer 5 Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 9 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Tagging not supported + vectorized: false Reduce Operator Tree: Join Operator condition map: