Repository: hive Updated Branches: refs/heads/master 489b37a54 -> 142367d96
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query87.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query87.q.out b/ql/src/test/results/clientpositive/perf/spark/query87.q.out index e7329a3..8ac6dce 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query87.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query87.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain select count(*) from ((select distinct c_last_name, c_first_name, d_date from store_sales, date_dim, customer @@ -19,7 +19,7 @@ from ((select distinct c_last_name, c_first_name, d_date and d_month_seq between 1212 and 1212+11) ) cool_cust PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain select count(*) from ((select distinct c_last_name, c_first_name, d_date from store_sales, date_dim, customer @@ -40,10 +40,6 @@ from ((select distinct c_last_name, c_first_name, d_date and d_month_seq between 1212 and 1212+11) ) cool_cust POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -62,40 +58,18 @@ STAGE PLANS: alias: date_dim filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColumnBetween(col 3:int, left 1212, right 1223), SelectColumnIsNotNull(col 0:int)) predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_date (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 2] Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work @@ -109,40 +83,18 @@ STAGE PLANS: alias: date_dim filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColumnBetween(col 3:int, left 1212, right 1223), SelectColumnIsNotNull(col 0:int)) predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_date (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 2] Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work @@ -156,40 +108,18 @@ STAGE PLANS: alias: date_dim filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColumnBetween(col 3:int, left 1212, right 1223), SelectColumnIsNotNull(col 0:int)) predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_date (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 2] Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work @@ -213,22 +143,12 @@ STAGE PLANS: alias: store_sales filterExpr: (ss_sold_date_sk is not null and ss_customer_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 3:int)) predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 3] Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -236,10 +156,6 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinInnerLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col1, _col3 input vertices: 1 Map 7 @@ -248,22 +164,9 @@ STAGE PLANS: key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: string) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work Map 13 @@ -272,65 +175,32 @@ STAGE PLANS: alias: customer filterExpr: c_customer_sk is not null (type: boolean) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: c_customer_sk is not null (type: boolean) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string) outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 8, 9] Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Map 14 Map Operator Tree: TableScan alias: web_sales filterExpr: (ws_sold_date_sk is not null and ws_bill_customer_sk is not null) (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 4:int)) predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 4] Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -338,10 +208,6 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinInnerLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col1, _col3 input vertices: 1 Map 17 @@ -350,22 +216,9 @@ STAGE PLANS: key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: string) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work Map 9 @@ -374,22 +227,12 @@ STAGE PLANS: alias: catalog_sales filterExpr: (cs_sold_date_sk is not null and cs_bill_customer_sk is not null) (type: boolean) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 3:int)) predicate: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 3] Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -397,10 +240,6 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinInnerLongOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col1, _col3 input vertices: 1 Map 12 @@ -409,30 +248,12 @@ STAGE PLANS: key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: string) Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true Local Work: Map Reduce Local Work Reducer 10 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -454,21 +275,8 @@ STAGE PLANS: Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Reducer 11 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:string, col 1:string, col 2:string - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -476,21 +284,9 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1, 0, 2] Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator - groupByMode: COMPLETE - keyExpressions: col 0:string, col 1:string, col 2:string - native: false - vectorProcessingMode: STREAMING - projectedOutputColumnNums: [0] keys: _col1 (type: string), _col0 (type: string), _col2 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 @@ -498,31 +294,13 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), 1L (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 4, 3] - selectExpressions: ConstantVectorExpression(val 1) -> 4:bigint Statistics: Num rows: 87116929 Data size: 11797382219 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: bigint), (_col3 * _col4) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3, 5] - selectExpressions: LongColMultiplyLongColumn(col 4:bigint, col 3:bigint) -> 5:bigint Statistics: Num rows: 261355616 Data size: 27168769766 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3), sum(_col4) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFSumLong(col 5:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string, col 1:string, col 2:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1] keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -531,18 +309,9 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 261355616 Data size: 27168769766 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: bigint) Reducer 15 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -564,21 +333,8 @@ STAGE PLANS: Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Reducer 16 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:string, col 1:string, col 2:string - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -586,21 +342,9 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1, 0, 2] Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator - groupByMode: COMPLETE - keyExpressions: col 0:string, col 1:string, col 2:string - native: false - vectorProcessingMode: STREAMING - projectedOutputColumnNums: [0] keys: _col1 (type: string), _col0 (type: string), _col2 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 @@ -608,31 +352,13 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), 1L (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 4, 3] - selectExpressions: ConstantVectorExpression(val 1) -> 4:bigint Statistics: Num rows: 43560808 Data size: 5923010113 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: bigint), (_col3 * _col4) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3, 5] - selectExpressions: LongColMultiplyLongColumn(col 4:bigint, col 3:bigint) -> 5:bigint Statistics: Num rows: 54450625 Data size: 7055042151 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3), sum(_col4) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFSumLong(col 5:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string, col 1:string, col 2:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1] keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -641,18 +367,9 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 54450625 Data size: 7055042151 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: bigint) Reducer 2 - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Tagging not supported - vectorized: false Reduce Operator Tree: Join Operator condition map: @@ -674,21 +391,8 @@ STAGE PLANS: Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:string, col 1:string, col 2:string - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 @@ -696,21 +400,9 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1, 0, 2] Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator - groupByMode: COMPLETE - keyExpressions: col 0:string, col 1:string, col 2:string - native: false - vectorProcessingMode: STREAMING - projectedOutputColumnNums: [0] keys: _col1 (type: string), _col0 (type: string), _col2 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 @@ -718,31 +410,13 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), 2L (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 4, 3] - selectExpressions: ConstantVectorExpression(val 2) -> 4:bigint Statistics: Num rows: 174238687 Data size: 15371387547 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: bigint), (_col3 * _col4) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3, 5] - selectExpressions: LongColMultiplyLongColumn(col 4:bigint, col 3:bigint) -> 5:bigint Statistics: Num rows: 261355616 Data size: 27168769766 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3), sum(_col4) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFSumLong(col 5:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string, col 1:string, col 2:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1] keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -751,60 +425,26 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 261355616 Data size: 27168769766 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: bigint) Reducer 4 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFSumLong(col 4:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:string, col 1:string, col 2:string - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 130677808 Data size: 13584384883 Basic stats: COMPLETE Column stats: NONE Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3:bigint, val 0), FilterLongColEqualLongColumn(col 5:bigint, col 4:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 2) -> 5:bigint)) predicate: (((_col3 * 2) = _col4) and (_col3 > 0L)) (type: boolean) Statistics: Num rows: 21779634 Data size: 2264064077 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 21779634 Data size: 2264064077 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator - groupByMode: COMPLETE - keyExpressions: col 0:string, col 1:string, col 2:string - native: false - vectorProcessingMode: STREAMING - projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 @@ -812,31 +452,13 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), 2L (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 4, 3] - selectExpressions: ConstantVectorExpression(val 2) -> 4:bigint Statistics: Num rows: 10889817 Data size: 1132032038 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: bigint), (_col3 * _col4) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3, 5] - selectExpressions: LongColMultiplyLongColumn(col 4:bigint, col 3:bigint) -> 5:bigint Statistics: Num rows: 54450625 Data size: 7055042151 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3), sum(_col4) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFSumLong(col 5:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string, col 1:string, col 2:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1] keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -845,31 +467,13 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 54450625 Data size: 7055042151 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: bigint) Reducer 5 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFSumLong(col 4:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:string, col 1:string, col 2:string - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -877,70 +481,31 @@ STAGE PLANS: Select Operator expressions: _col3 (type: bigint), _col4 (type: bigint) outputColumnNames: _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [3, 4] Statistics: Num rows: 27225312 Data size: 3527521010 Basic stats: COMPLETE Column stats: NONE Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3:bigint, val 0), FilterLongColEqualLongColumn(col 5:bigint, col 4:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 2) -> 5:bigint)) predicate: (((_col3 * 2) = _col4) and (_col3 > 0L)) (type: boolean) Statistics: Num rows: 4537552 Data size: 587920168 Basic stats: COMPLETE Column stats: NONE Select Operator - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [] Statistics: Num rows: 4537552 Data size: 587920168 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 6 Execution mode: vectorized - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - Group By Vectorization: - aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - native: false - vectorProcessingMode: GLOBAL - projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat