http://git-wip-us.apache.org/repos/asf/hive/blob/e74eb354/ql/src/test/results/clientpositive/perf/spark/query25.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query25.q.out b/ql/src/test/results/clientpositive/perf/spark/query25.q.out index 3d7f402..d15c8ce 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query25.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query25.q.out @@ -94,9 +94,7 @@ select POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -104,7 +102,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 14 + Map 16 Map Operator Tree: TableScan alias: store @@ -125,65 +123,17 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 10 - Map Operator Tree: - TableScan - alias: d3 - filterExpr: (d_moy BETWEEN 4 AND 10 and (d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) (type: boolean) - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: - Map 12 - Map Operator Tree: - TableScan - alias: d2 - filterExpr: (d_moy BETWEEN 4 AND 10 and (d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) (type: boolean) - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: + Reducer 10 <- Reducer 13 (PARTITION-LEVEL SORT, 374), Reducer 9 (PARTITION-LEVEL SORT, 374) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 36), Map 14 (PARTITION-LEVEL SORT, 36) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 806), Reducer 9 (PARTITION-LEVEL SORT, 806) - Reducer 4 <- Map 13 (PARTITION-LEVEL SORT, 486), Reducer 3 (PARTITION-LEVEL SORT, 486) + Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 806), Reducer 2 (PARTITION-LEVEL SORT, 806) + Reducer 4 <- Map 15 (PARTITION-LEVEL SORT, 486), Reducer 3 (PARTITION-LEVEL SORT, 486) Reducer 5 <- Reducer 4 (GROUP, 582) Reducer 6 <- Reducer 5 (SORT, 1) - Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 374), Map 8 (PARTITION-LEVEL SORT, 374) + Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 306), Map 8 (PARTITION-LEVEL SORT, 306) #### A masked pattern was here #### Vertices: Map 1 @@ -209,6 +159,25 @@ STAGE PLANS: Map 11 Map Operator Tree: TableScan + alias: d3 + filterExpr: (d_moy BETWEEN 4 AND 10 and (d_year = 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 12 + Map Operator Tree: + TableScan alias: store_returns filterExpr: (sr_customer_sk is not null and sr_item_sk is not null and sr_ticket_number is not null and sr_returned_date_sk is not null) (type: boolean) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE @@ -219,26 +188,33 @@ STAGE PLANS: expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_customer_sk (type: int), sr_ticket_number (type: int), sr_net_loss (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4 - input vertices: - 1 Map 12 - Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(7,2)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 13 + Map 14 + Map Operator Tree: + TableScan + alias: d2 + filterExpr: (d_moy BETWEEN 4 AND 10 and (d_year = 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 15 Map Operator Tree: TableScan alias: item @@ -290,25 +266,45 @@ STAGE PLANS: expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_net_profit (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - input vertices: - 1 Map 10 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col2 (type: int), _col1 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: decimal(7,2)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Local Work: - Map Reduce Local Work + Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int), _col1 (type: int) + 1 _col1 (type: int), _col2 (type: int) + outputColumnNames: _col3, _col8, _col9, _col10, _col11 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int) + sort order: +++ + Map-reduce partition columns: _col8 (type: int), _col9 (type: int), _col10 (type: int) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(7,2)), _col11 (type: decimal(7,2)) + Reducer 13 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)) Reducer 2 Reduce Operator Tree: Join Operator @@ -361,7 +357,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col5, _col12, _col20, _col25, _col26, _col28, _col29 input vertices: - 1 Map 14 + 1 Map 16 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col5), sum(_col20), sum(_col12) @@ -414,16 +410,16 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int), _col1 (type: int) - 1 _col1 (type: int), _col2 (type: int) - outputColumnNames: _col3, _col8, _col9, _col10, _col11 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int) - sort order: +++ - Map-reduce partition columns: _col8 (type: int), _col9 (type: int), _col10 (type: int) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: decimal(7,2)), _col11 (type: decimal(7,2)) + key expressions: _col2 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/e74eb354/ql/src/test/results/clientpositive/perf/spark/query28.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query28.q.out b/ql/src/test/results/clientpositive/perf/spark/query28.q.out index b437829..21d55cb 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query28.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query28.q.out @@ -112,15 +112,15 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 11 <- Map 10 (GROUP, 15) + Reducer 11 <- Map 10 (GROUP, 133) Reducer 12 <- Reducer 11 (GROUP, 1) - Reducer 14 <- Map 13 (GROUP, 15) + Reducer 14 <- Map 13 (GROUP, 133) Reducer 15 <- Reducer 14 (GROUP, 1) - Reducer 17 <- Map 16 (GROUP, 15) + Reducer 17 <- Map 16 (GROUP, 133) Reducer 18 <- Reducer 17 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 15) + Reducer 5 <- Map 4 (GROUP, 133) Reducer 6 <- Reducer 5 (GROUP, 1) - Reducer 8 <- Map 7 (GROUP, 15) + Reducer 8 <- Map 7 (GROUP, 133) Reducer 9 <- Reducer 8 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -132,22 +132,22 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((ss_list_price BETWEEN 142 AND 152 or ss_coupon_amt BETWEEN 3054 AND 4054 or ss_wholesale_cost BETWEEN 80 AND 100) and ss_quantity BETWEEN 16 AND 20) (type: boolean) - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_list_price (type: decimal(7,2)) outputColumnNames: ss_list_price - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_list_price), count(ss_list_price) keys: ss_list_price (type: decimal(7,2)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(7,2)) sort order: + Map-reduce partition columns: _col0 (type: decimal(7,2)) - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Execution mode: vectorized Map 13 @@ -158,22 +158,22 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((ss_list_price BETWEEN 66 AND 76 or ss_coupon_amt BETWEEN 920 AND 1920 or ss_wholesale_cost BETWEEN 4 AND 24) and ss_quantity BETWEEN 11 AND 15) (type: boolean) - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_list_price (type: decimal(7,2)) outputColumnNames: ss_list_price - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_list_price), count(ss_list_price) keys: ss_list_price (type: decimal(7,2)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(7,2)) sort order: + Map-reduce partition columns: _col0 (type: decimal(7,2)) - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Execution mode: vectorized Map 16 @@ -184,22 +184,22 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((ss_list_price BETWEEN 91 AND 101 or ss_coupon_amt BETWEEN 1430 AND 2430 or ss_wholesale_cost BETWEEN 32 AND 52) and ss_quantity BETWEEN 6 AND 10) (type: boolean) - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_list_price (type: decimal(7,2)) outputColumnNames: ss_list_price - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_list_price), count(ss_list_price) keys: ss_list_price (type: decimal(7,2)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(7,2)) sort order: + Map-reduce partition columns: _col0 (type: decimal(7,2)) - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Execution mode: vectorized Map 4 @@ -210,22 +210,22 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((ss_list_price BETWEEN 28 AND 38 or ss_coupon_amt BETWEEN 2513 AND 3513 or ss_wholesale_cost BETWEEN 42 AND 62) and ss_quantity BETWEEN 26 AND 30) (type: boolean) - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_list_price (type: decimal(7,2)) outputColumnNames: ss_list_price - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_list_price), count(ss_list_price) keys: ss_list_price (type: decimal(7,2)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(7,2)) sort order: + Map-reduce partition columns: _col0 (type: decimal(7,2)) - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Execution mode: vectorized Map 7 @@ -236,22 +236,22 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((ss_list_price BETWEEN 135 AND 145 or ss_coupon_amt BETWEEN 14180 AND 15180 or ss_wholesale_cost BETWEEN 38 AND 58) and ss_quantity BETWEEN 21 AND 25) (type: boolean) - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_list_price (type: decimal(7,2)) outputColumnNames: ss_list_price - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_list_price), count(ss_list_price) keys: ss_list_price (type: decimal(7,2)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(7,2)) sort order: + Map-reduce partition columns: _col0 (type: decimal(7,2)) - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Execution mode: vectorized Reducer 11 @@ -262,7 +262,7 @@ STAGE PLANS: keys: KEY._col0 (type: decimal(7,2)) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count(_col2), count(_col0) mode: partial2 @@ -302,7 +302,7 @@ STAGE PLANS: keys: KEY._col0 (type: decimal(7,2)) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count(_col2), count(_col0) mode: partial2 @@ -342,7 +342,7 @@ STAGE PLANS: keys: KEY._col0 (type: decimal(7,2)) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count(_col2), count(_col0) mode: partial2 @@ -382,7 +382,7 @@ STAGE PLANS: keys: KEY._col0 (type: decimal(7,2)) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count(_col2), count(_col0) mode: partial2 @@ -422,7 +422,7 @@ STAGE PLANS: keys: KEY._col0 (type: decimal(7,2)) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count(_col2), count(_col0) mode: partial2 @@ -458,7 +458,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 15) + Reducer 2 <- Map 1 (GROUP, 133) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -470,22 +470,22 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((ss_list_price BETWEEN 11 AND 21 or ss_coupon_amt BETWEEN 460 AND 1460 or ss_wholesale_cost BETWEEN 14 AND 34) and ss_quantity BETWEEN 0 AND 5) (type: boolean) - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_list_price (type: decimal(7,2)) outputColumnNames: ss_list_price - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_list_price), count(ss_list_price) keys: ss_list_price (type: decimal(7,2)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(7,2)) sort order: + Map-reduce partition columns: _col0 (type: decimal(7,2)) - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Execution mode: vectorized Reducer 2 @@ -496,7 +496,7 @@ STAGE PLANS: keys: KEY._col0 (type: decimal(7,2)) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count(_col2), count(_col0) mode: partial2 http://git-wip-us.apache.org/repos/asf/hive/blob/e74eb354/ql/src/test/results/clientpositive/perf/spark/query29.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query29.q.out b/ql/src/test/results/clientpositive/perf/spark/query29.q.out index a734710..a3b0610 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query29.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query29.q.out @@ -92,8 +92,7 @@ select POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -101,7 +100,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 15 + Map 16 Map Operator Tree: TableScan alias: store @@ -122,41 +121,17 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 13 - Map Operator Tree: - TableScan - alias: d2 - filterExpr: (d_moy BETWEEN 4 AND 7 and (d_year = 1999) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) (type: boolean) - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 14 (PARTITION-LEVEL SORT, 486), Reducer 9 (PARTITION-LEVEL SORT, 486) + Reducer 10 <- Map 15 (PARTITION-LEVEL SORT, 486), Reducer 9 (PARTITION-LEVEL SORT, 486) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 36), Map 14 (PARTITION-LEVEL SORT, 36) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 6 (PARTITION-LEVEL SORT, 306) Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 917), Reducer 2 (PARTITION-LEVEL SORT, 917) Reducer 4 <- Reducer 3 (GROUP, 640) Reducer 5 <- Reducer 4 (SORT, 1) Reducer 8 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 9 <- Map 12 (PARTITION-LEVEL SORT, 476), Reducer 8 (PARTITION-LEVEL SORT, 476) + Reducer 9 <- Reducer 13 (PARTITION-LEVEL SORT, 476), Reducer 8 (PARTITION-LEVEL SORT, 476) #### A masked pattern was here #### Vertices: Map 1 @@ -211,28 +186,35 @@ STAGE PLANS: expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_customer_sk (type: int), sr_ticket_number (type: int), sr_return_quantity (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4 - input vertices: - 1 Map 13 - Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col3 (type: int) - Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) Execution mode: vectorized - Local Work: - Map Reduce Local Work Map 14 Map Operator Tree: TableScan + alias: d2 + filterExpr: (d_moy BETWEEN 4 AND 7 and (d_year = 1999) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 15 + Map Operator Tree: + TableScan alias: item filterExpr: i_item_sk is not null (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE @@ -309,7 +291,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col5, _col10, _col11, _col13, _col18, _col19, _col21, _col22 input vertices: - 1 Map 15 + 1 Map 16 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col18 (type: string), _col19 (type: string), _col5 (type: int), _col10 (type: int), _col11 (type: int), _col13 (type: int), _col21 (type: string), _col22 (type: string) @@ -321,6 +303,22 @@ STAGE PLANS: Map-reduce partition columns: _col14 (type: int), _col13 (type: int) Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col8 (type: int), _col16 (type: int), _col21 (type: string), _col22 (type: string) + Reducer 13 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col3 (type: int) + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: int) Reducer 2 Reduce Operator Tree: Join Operator http://git-wip-us.apache.org/repos/asf/hive/blob/e74eb354/ql/src/test/results/clientpositive/perf/spark/query34.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query34.q.out b/ql/src/test/results/clientpositive/perf/spark/query34.q.out index 88279a3..b00389b 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query34.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query34.q.out @@ -112,7 +112,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 567), Reducer 6 (PARTITION-LEVEL SORT, 567) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 551), Reducer 6 (PARTITION-LEVEL SORT, 551) Reducer 3 <- Reducer 2 (SORT, 1) Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) Reducer 6 <- Reducer 5 (GROUP, 529) @@ -166,16 +166,16 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((d_dom BETWEEN 1 AND 3 or d_dom BETWEEN 25 AND 28) and (d_year) IN (2000, 2001, 2002) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 16232 Data size: 18163608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 16232 Data size: 18163608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 16232 Data size: 18163608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -269,12 +269,12 @@ STAGE PLANS: Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col2 BETWEEN 15 AND 20 (type: boolean) - Statistics: Num rows: 42591679 Data size: 3757450287 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 19166256 Data size: 1690852669 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 42591679 Data size: 3757450287 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 19166256 Data size: 1690852669 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: bigint) Stage: Stage-0 http://git-wip-us.apache.org/repos/asf/hive/blob/e74eb354/ql/src/test/results/clientpositive/perf/spark/query38.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query38.q.out b/ql/src/test/results/clientpositive/perf/spark/query38.q.out index 0064177..e011869 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query38.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query38.q.out @@ -43,68 +43,47 @@ select count(*) from ( limit 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-4 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 6 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int), d_date (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 306), Map 9 (PARTITION-LEVEL SORT, 306) + Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 873), Reducer 10 (PARTITION-LEVEL SORT, 873) + Reducer 12 <- Reducer 11 (GROUP PARTITION-LEVEL SORT, 369) + Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 154), Map 19 (PARTITION-LEVEL SORT, 154) + Reducer 17 <- Map 20 (PARTITION-LEVEL SORT, 706), Reducer 16 (PARTITION-LEVEL SORT, 706) + Reducer 18 <- Reducer 17 (GROUP PARTITION-LEVEL SORT, 186) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 13 (PARTITION-LEVEL SORT, 398) + Reducer 3 <- Map 14 (PARTITION-LEVEL SORT, 975), Reducer 2 (PARTITION-LEVEL SORT, 975) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 481) + Reducer 5 <- Reducer 12 (GROUP, 259), Reducer 18 (GROUP, 259), Reducer 4 (GROUP, 259) + Reducer 6 <- Reducer 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 11 + Map 1 Map Operator Tree: TableScan - alias: date_dim - filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_sold_date_sk is not null and ss_customer_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int), d_date (type: string) + expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: - Map 16 + Map 13 Map Operator Tree: TableScan alias: date_dim @@ -112,65 +91,19 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_date (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 10 <- Reducer 9 (GROUP PARTITION-LEVEL SORT, 369) - Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 706), Map 17 (PARTITION-LEVEL SORT, 706) - Reducer 15 <- Reducer 14 (GROUP PARTITION-LEVEL SORT, 186) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 975), Map 12 (PARTITION-LEVEL SORT, 975) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 481) - Reducer 4 <- Reducer 10 (GROUP, 259), Reducer 15 (GROUP, 259), Reducer 3 (GROUP, 259) - Reducer 5 <- Reducer 4 (GROUP, 1) - Reducer 9 <- Map 12 (PARTITION-LEVEL SORT, 873), Map 8 (PARTITION-LEVEL SORT, 873) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_sold_date_sk is not null and ss_customer_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3 - input vertices: - 1 Map 6 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 12 + Map 14 Map Operator Tree: TableScan alias: customer @@ -190,7 +123,7 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 13 + Map 15 Map Operator Tree: TableScan alias: web_sales @@ -203,26 +136,34 @@ STAGE PLANS: expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3 - input vertices: - 1 Map 16 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized + Map 19 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_date (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 17 + Map 20 Map Operator Tree: TableScan alias: customer @@ -242,7 +183,7 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 8 + Map 9 Map Operator Tree: TableScan alias: catalog_sales @@ -255,26 +196,50 @@ STAGE PLANS: expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3 - input vertices: - 1 Map 11 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: vectorized - Local Work: - Map Reduce Local Work Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + Reducer 11 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col6, _col7 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col7 (type: string), _col6 (type: string), _col3 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Reducer 12 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -304,7 +269,23 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 304916424 Data size: 33091779879 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Reducer 14 + Reducer 16 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + Reducer 17 Reduce Operator Tree: Join Operator condition map: @@ -324,7 +305,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Reducer 15 + Reducer 18 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -360,6 +341,22 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col3, _col6, _col7 @@ -374,7 +371,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -404,7 +401,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 304916424 Data size: 33091779879 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Reducer 4 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -432,7 +429,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -450,26 +447,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col6, _col7 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col7 (type: string), _col6 (type: string), _col3 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/e74eb354/ql/src/test/results/clientpositive/perf/spark/query51.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query51.q.out b/ql/src/test/results/clientpositive/perf/spark/query51.q.out index 21afbe2..5651f8c 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query51.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query51.q.out @@ -87,17 +87,43 @@ order by item_sk limit 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 10 <- Reducer 9 (GROUP PARTITION-LEVEL SORT, 169) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 437) + Reducer 4 <- Reducer 10 (PARTITION-LEVEL SORT, 303), Reducer 3 (PARTITION-LEVEL SORT, 303) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 241) + Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 154), Map 8 (PARTITION-LEVEL SORT, 154) #### A masked pattern was here #### Vertices: - Map 6 + Map 1 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) + Execution mode: vectorized + Map 11 Map Operator Tree: TableScan alias: date_dim @@ -105,24 +131,19 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_date (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 9 + Map 7 Map Operator Tree: TableScan alias: date_dim @@ -130,68 +151,19 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_date (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 437) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 303), Reducer 8 (PARTITION-LEVEL SORT, 303) - Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 241) - Reducer 5 <- Reducer 4 (SORT, 1) - Reducer 8 <- Map 7 (GROUP PARTITION-LEVEL SORT, 169) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col4 - input vertices: - 1 Map 6 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col2) - keys: _col1 (type: int), _col4 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 7 + Map 8 Map Operator Tree: TableScan alias: web_sales @@ -204,33 +176,75 @@ STAGE PLANS: expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_sales_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col4 - input vertices: - 1 Map 9 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col2) - keys: _col1 (type: int), _col4 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Local Work: - Map Reduce Local Work + Reducer 10 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: string, _col2: decimal(17,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), sum_window_0 (type: decimal(27,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(27,2)) Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col4 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col1 (type: int), _col4 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(17,2)) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: int), KEY._col1 (type: string) @@ -267,7 +281,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(27,2)) - Reducer 3 + Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -283,7 +297,7 @@ STAGE PLANS: Map-reduce partition columns: CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END (type: int) Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: decimal(27,2)), _col3 (type: int), _col4 (type: string), _col5 (type: decimal(27,2)) - Reducer 4 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -329,7 +343,7 @@ STAGE PLANS: Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(27,2)), _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(27,2)) - Reducer 5 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -346,44 +360,28 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 + Reducer 9 Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: int, _col1: string, _col2: decimal(17,2) - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col0 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col2 - name: sum - window function: GenericUDAFSumHiveDecimal - window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), sum_window_0 (type: decimal(27,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(27,2)) + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col4 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col1 (type: int), _col4 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(17,2)) Stage: Stage-0 Fetch Operator