http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query58.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query58.q.out b/ql/src/test/results/clientpositive/perf/spark/query58.q.out index eb5dffe..f06cbef 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query58.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query58.q.out @@ -1,6 +1,6 @@ -Warning: Map Join MAPJOIN[180][bigTable=?] in task 'Stage-1:MAPRED' is a cross product -Warning: Map Join MAPJOIN[181][bigTable=?] in task 'Stage-1:MAPRED' is a cross product -Warning: Map Join MAPJOIN[182][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[183][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[184][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[185][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain with ss_items as (select i_item_id item_id @@ -140,10 +140,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 12 <- Map 11 (GROUP, 1) + Reducer 11 <- Map 10 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 11 + Map 10 Map Operator Tree: TableScan alias: date_dim @@ -162,7 +162,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 12 + Reducer 11 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -184,10 +184,10 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 25 <- Map 24 (GROUP, 1) + Reducer 24 <- Map 23 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 24 + Map 23 Map Operator Tree: TableScan alias: date_dim @@ -206,7 +206,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 25 + Reducer 24 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -228,10 +228,10 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 38 <- Map 37 (GROUP, 1) + Reducer 37 <- Map 36 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 37 + Map 36 Map Operator Tree: TableScan alias: date_dim @@ -250,7 +250,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 38 + Reducer 37 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -272,47 +272,76 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Reducer 9 (GROUP, 2) - Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 2), Reducer 23 (PARTITION-LEVEL SORT, 2) - Reducer 18 <- Map 27 (PARTITION-LEVEL SORT, 398), Reducer 17 (PARTITION-LEVEL SORT, 398) - Reducer 19 <- Map 28 (PARTITION-LEVEL SORT, 442), Reducer 18 (PARTITION-LEVEL SORT, 442) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 10 (PARTITION-LEVEL SORT, 2) - Reducer 20 <- Reducer 19 (GROUP, 481) - Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 2), Map 26 (PARTITION-LEVEL SORT, 2) - Reducer 23 <- Reducer 22 (GROUP, 2) - Reducer 3 <- Map 14 (PARTITION-LEVEL SORT, 306), Reducer 2 (PARTITION-LEVEL SORT, 306) - Reducer 30 <- Map 29 (PARTITION-LEVEL SORT, 2), Reducer 36 (PARTITION-LEVEL SORT, 2) - Reducer 31 <- Map 40 (PARTITION-LEVEL SORT, 154), Reducer 30 (PARTITION-LEVEL SORT, 154) - Reducer 32 <- Map 41 (PARTITION-LEVEL SORT, 174), Reducer 31 (PARTITION-LEVEL SORT, 174) - Reducer 33 <- Reducer 32 (GROUP, 186) - Reducer 35 <- Map 34 (PARTITION-LEVEL SORT, 2), Map 39 (PARTITION-LEVEL SORT, 2) - Reducer 36 <- Reducer 35 (GROUP, 2) - Reducer 4 <- Map 15 (PARTITION-LEVEL SORT, 341), Reducer 3 (PARTITION-LEVEL SORT, 341) - Reducer 5 <- Reducer 4 (GROUP, 369) - Reducer 6 <- Reducer 20 (PARTITION-LEVEL SORT, 518), Reducer 33 (PARTITION-LEVEL SORT, 518), Reducer 5 (PARTITION-LEVEL SORT, 518) - Reducer 7 <- Reducer 6 (SORT, 1) - Reducer 9 <- Map 13 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 2), Map 15 (PARTITION-LEVEL SORT, 2) + Reducer 14 <- Reducer 13 (GROUP, 2) + Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 403), Map 20 (PARTITION-LEVEL SORT, 403) + Reducer 18 <- Reducer 17 (PARTITION-LEVEL SORT, 438), Reducer 22 (PARTITION-LEVEL SORT, 438) + Reducer 19 <- Reducer 18 (GROUP, 481) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 310), Map 7 (PARTITION-LEVEL SORT, 310) + Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 2), Reducer 27 (PARTITION-LEVEL SORT, 2) + Reducer 26 <- Map 25 (PARTITION-LEVEL SORT, 2), Map 28 (PARTITION-LEVEL SORT, 2) + Reducer 27 <- Reducer 26 (GROUP, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 336), Reducer 9 (PARTITION-LEVEL SORT, 336) + Reducer 30 <- Map 29 (PARTITION-LEVEL SORT, 159), Map 33 (PARTITION-LEVEL SORT, 159) + Reducer 31 <- Reducer 30 (PARTITION-LEVEL SORT, 169), Reducer 35 (PARTITION-LEVEL SORT, 169) + Reducer 32 <- Reducer 31 (GROUP, 186) + Reducer 35 <- Map 34 (PARTITION-LEVEL SORT, 2), Reducer 40 (PARTITION-LEVEL SORT, 2) + Reducer 39 <- Map 38 (PARTITION-LEVEL SORT, 2), Map 41 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (GROUP, 369) + Reducer 40 <- Reducer 39 (GROUP, 2) + Reducer 5 <- Reducer 19 (PARTITION-LEVEL SORT, 518), Reducer 32 (PARTITION-LEVEL SORT, 518), Reducer 4 (PARTITION-LEVEL SORT, 518) + Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 9 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 14 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: date_dim - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: catalog_sales + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d_date is not null and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + predicate: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int), d_date (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Map 13 + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: decimal(7,2)) + Map 12 + Map Operator Tree: + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_date = '1998-02-19') and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_week_seq (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col1 + input vertices: + 0 Reducer 11 + Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work + Map 15 Map Operator Tree: TableScan alias: date_dim @@ -330,25 +359,25 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 14 + Map 16 Map Operator Tree: TableScan - alias: catalog_sales - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_ext_sales_price (type: decimal(7,2)) + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) - Map 15 + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: decimal(7,2)) + Map 20 Map Operator Tree: TableScan alias: item @@ -366,7 +395,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 16 + Map 21 Map Operator Tree: TableScan alias: date_dim @@ -384,7 +413,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) - Map 21 + Map 25 Map Operator Tree: TableScan alias: date_dim @@ -402,18 +431,18 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0 + outputColumnNames: _col1 input vertices: - 1 Reducer 25 + 0 Reducer 24 Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Map 26 + Map 28 Map Operator Tree: TableScan alias: date_dim @@ -431,25 +460,25 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 27 + Map 29 Map Operator Tree: TableScan - alias: store_sales - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: web_sales + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: (ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_ext_sales_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) - Map 28 + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: decimal(7,2)) + Map 33 Map Operator Tree: TableScan alias: item @@ -467,7 +496,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 29 + Map 34 Map Operator Tree: TableScan alias: date_dim @@ -485,7 +514,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) - Map 34 + Map 38 Map Operator Tree: TableScan alias: date_dim @@ -503,18 +532,18 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0 + outputColumnNames: _col1 input vertices: - 1 Reducer 38 + 0 Reducer 37 Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Map 39 + Map 41 Map Operator Tree: TableScan alias: date_dim @@ -532,25 +561,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 40 - Map Operator Tree: - TableScan - alias: web_sales - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_ext_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) - Map 41 + Map 7 Map Operator Tree: TableScan alias: item @@ -574,30 +585,39 @@ STAGE PLANS: alias: date_dim Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_date = '1998-02-19') and d_week_seq is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + predicate: (d_date is not null and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_week_seq (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0 - input vertices: - 1 Reducer 12 - Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reducer 10 + expressions: d_date_sk (type: int), d_date (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reducer 13 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col2 + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + Reducer 14 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -615,15 +635,16 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col4 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)), _col4 (type: string) Reducer 18 Reduce Operator Tree: Join Operator @@ -632,27 +653,11 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col4, _col5 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: int) - sort order: + - Map-reduce partition columns: _col4 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: decimal(7,2)) - Reducer 19 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col7 + outputColumnNames: _col2, _col4 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col5) - keys: _col7 (type: string) + aggregations: sum(_col2) + keys: _col4 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE @@ -662,12 +667,42 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)) + Reducer 19 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)) Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col4 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)), _col4 (type: string) + Reducer 22 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 @@ -677,27 +712,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 20 - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)) - Reducer 22 + Reducer 26 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col1 (type: int) outputColumnNames: _col2 Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE @@ -711,7 +732,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 23 + Reducer 27 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -731,29 +752,36 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col4, _col5 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: int) - sort order: + - Map-reduce partition columns: _col4 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: decimal(7,2)) + outputColumnNames: _col2, _col4 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col4 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)) Reducer 30 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col4 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)), _col4 (type: string) Reducer 31 Reduce Operator Tree: Join Operator @@ -762,27 +790,11 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col4, _col5 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: int) - sort order: + - Map-reduce partition columns: _col4 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: decimal(7,2)) - Reducer 32 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col7 + outputColumnNames: _col2, _col4 Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col5) - keys: _col7 (type: string) + aggregations: sum(_col2) + keys: _col4 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE @@ -792,7 +804,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)) - Reducer 33 + Reducer 32 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -812,7 +824,22 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + Reducer 39 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) 1 _col1 (type: int) outputColumnNames: _col2 Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE @@ -826,55 +853,33 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 36 + Reducer 4 Reduce Operator Tree: Group By Operator + aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col7 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col5) - keys: _col7 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)) - Reducer 5 + Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)) + Reducer 40 Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)) - Reducer 6 + Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE + Reducer 5 Reduce Operator Tree: Join Operator condition map: @@ -899,7 +904,7 @@ STAGE PLANS: Statistics: Num rows: 1442 Data size: 127213 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(38,17)), _col3 (type: decimal(17,2)), _col4 (type: decimal(38,17)), _col5 (type: decimal(17,2)), _col6 (type: decimal(38,17)), _col7 (type: decimal(23,6)) - Reducer 7 + Reducer 6 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: decimal(17,2)), VALUE._col0 (type: decimal(38,17)), VALUE._col1 (type: decimal(17,2)), VALUE._col2 (type: decimal(38,17)), VALUE._col3 (type: decimal(17,2)), VALUE._col4 (type: decimal(38,17)), VALUE._col5 (type: decimal(23,6)) @@ -921,20 +926,15 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col2 + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col2 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query6.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query6.q.out b/ql/src/test/results/clientpositive/perf/spark/query6.q.out index 8090c8a..954bacb 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query6.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query6.q.out @@ -58,11 +58,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 6 <- Map 5 (GROUP, 2) - Reducer 7 <- Reducer 6 (GROUP, 1) + Reducer 18 <- Map 17 (GROUP, 2) + Reducer 19 <- Reducer 18 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 5 + Map 17 Map Operator Tree: TableScan alias: date_dim @@ -84,7 +84,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Reducer 6 + Reducer 18 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -102,7 +102,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 7 + Reducer 19 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -124,71 +124,21 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Reducer 16 (PARTITION-LEVEL SORT, 439), Reducer 9 (PARTITION-LEVEL SORT, 439) - Reducer 11 <- Map 20 (PARTITION-LEVEL SORT, 1009), Reducer 10 (PARTITION-LEVEL SORT, 1009) - Reducer 13 <- Map 12 (GROUP, 6) - Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 2), Reducer 18 (PARTITION-LEVEL SORT, 2) - Reducer 16 <- Map 19 (PARTITION-LEVEL SORT, 398), Reducer 15 (PARTITION-LEVEL SORT, 398) - Reducer 18 <- Map 17 (GROUP, 2) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 849), Reducer 11 (PARTITION-LEVEL SORT, 849) - Reducer 3 <- Reducer 2 (GROUP, 582) - Reducer 4 <- Reducer 3 (SORT, 1) - Reducer 9 <- Map 8 (PARTITION-LEVEL SORT, 8), Reducer 13 (PARTITION-LEVEL SORT, 8) + Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 855), Map 13 (PARTITION-LEVEL SORT, 855) + Reducer 15 <- Map 14 (GROUP, 6) + Reducer 16 <- Map 20 (PARTITION-LEVEL SORT, 8), Reducer 15 (PARTITION-LEVEL SORT, 8) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 398), Reducer 2 (PARTITION-LEVEL SORT, 398) + Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) + Reducer 5 <- Reducer 16 (PARTITION-LEVEL SORT, 483), Reducer 4 (PARTITION-LEVEL SORT, 483) + Reducer 6 <- Reducer 5 (GROUP, 529) + Reducer 7 <- Reducer 6 (SORT, 1) + Reducer 9 <- Map 8 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ca_address_sk is not null (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ca_address_sk (type: int), ca_state (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - input vertices: - 1 Reducer 7 - Statistics: Num rows: 40000000 Data size: 40955195284 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 40000000 Data size: 40955195284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work - Map 12 - Map Operator Tree: - TableScan - alias: j - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: i_category is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(i_current_price), count(i_current_price) - keys: i_category (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) - Map 14 - Map Operator Tree: - TableScan alias: d Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -204,29 +154,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) - Map 17 - Map Operator Tree: - TableScan - alias: date_dim - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_moy = 2) and (d_year = 2000) and d_month_seq is not null) (type: boolean) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_month_seq (type: int) - outputColumnNames: d_month_seq - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: d_month_seq (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Map 19 + Map 10 Map Operator Tree: TableScan alias: s @@ -244,7 +172,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) - Map 20 + Map 11 Map Operator Tree: TableScan alias: c @@ -257,12 +185,50 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 13 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Map 8 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 14 + Map Operator Tree: + TableScan + alias: j + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_category is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(i_current_price), count(i_current_price) + keys: i_category (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) + Map 20 Map Operator Tree: TableScan alias: i @@ -280,41 +246,47 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: decimal(7,2)) - Reducer 10 + Map 8 + Map Operator Tree: + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_moy = 2) and (d_year = 2000) and d_month_seq is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_month_seq (type: int) + outputColumnNames: d_month_seq + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: d_month_seq (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reducer 12 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col8 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col8 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col8 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reducer 11 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col8 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col13 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col13 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Reducer 13 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + Reducer 15 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1) @@ -326,13 +298,45 @@ STAGE PLANS: expressions: (_col1 / _col2) (type: decimal(37,22)), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(37,22)), _col1 (type: boolean) - Reducer 15 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Reducer 19 + Statistics: Num rows: 231000 Data size: 333859228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 231000 Data size: 333859228 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(37,22)), _col1 (type: boolean) + Reducer 16 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col5 > (1.2 * CASE WHEN (_col1 is null) THEN (null) ELSE (_col0) END)) (type: boolean) + Statistics: Num rows: 169400 Data size: 243305506 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 169400 Data size: 243305506 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 169400 Data size: 243305506 Basic stats: COMPLETE Column stats: NONE + Reducer 2 Reduce Operator Tree: Join Operator condition map: @@ -347,7 +351,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 16 + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -357,73 +361,73 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col4, _col5 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: int), _col5 (type: int) - outputColumnNames: _col1, _col2 + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + Map-reduce partition columns: _col5 (type: int) Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int) - Reducer 18 + value expressions: _col4 (type: int) + Reducer 4 Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col9 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col4 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col9 (type: string) + Reducer 5 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col9 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - keys: _col1 (type: string) + keys: _col9 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 3 + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col1 >= 10L) (type: boolean) - Statistics: Num rows: 140552546 Data size: 12399586418 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 127775039 Data size: 11272351038 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: bigint) sort order: + - Statistics: Num rows: 140552546 Data size: 12399586418 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 127775039 Data size: 11272351038 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) - Reducer 4 + Reducer 7 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 140552546 Data size: 12399586418 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 127775039 Data size: 11272351038 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE @@ -436,22 +440,16 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 9 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: string) - 1 _col2 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 > (1.2 * CASE WHEN (_col4 is null) THEN (null) ELSE (_col3) END)) (type: boolean) - Statistics: Num rows: 169400 Data size: 243305506 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 169400 Data size: 243305506 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query61.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query61.q.out b/ql/src/test/results/clientpositive/perf/spark/query61.q.out index c01612a..5062307 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query61.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query61.q.out @@ -235,11 +235,11 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col6 (type: int) - outputColumnNames: _col12 + 1 _col2 (type: int) + outputColumnNames: _col8 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col12) + aggregations: sum(_col8) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE @@ -298,16 +298,12 @@ STAGE PLANS: input vertices: 1 Map 24 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col4 (type: decimal(7,2)) - outputColumnNames: _col6, _col8 + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col6 (type: int) - sort order: + - Map-reduce partition columns: _col6 (type: int) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - value expressions: _col8 (type: decimal(7,2)) + value expressions: _col4 (type: decimal(7,2)) Stage: Stage-4 Spark @@ -470,11 +466,11 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col10 (type: int) - outputColumnNames: _col17 + 1 _col2 (type: int) + outputColumnNames: _col9 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col17) + aggregations: sum(_col9) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE @@ -575,16 +571,12 @@ STAGE PLANS: input vertices: 1 Map 13 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col5 (type: decimal(7,2)) - outputColumnNames: _col10, _col13 + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col10 (type: int) - sort order: + - Map-reduce partition columns: _col10 (type: int) - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - value expressions: _col13 (type: decimal(7,2)) + value expressions: _col5 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query63.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query63.q.out b/ql/src/test/results/clientpositive/perf/spark/query63.q.out index 1f62e87..3c934de 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query63.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query63.q.out @@ -64,7 +64,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 9 + Map 8 Map Operator Tree: TableScan alias: store @@ -86,11 +86,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 399), Map 7 (PARTITION-LEVEL SORT, 399) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438) - Reducer 4 <- Reducer 3 (GROUP, 529) - Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 265) - Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 399), Map 6 (PARTITION-LEVEL SORT, 399) + Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 529) + Reducer 5 <- Reducer 4 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -111,7 +110,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) - Map 7 + Map 6 Map Operator Tree: TableScan alias: item @@ -129,7 +128,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: int) - Map 8 + Map 7 Map Operator Tree: TableScan alias: date_dim @@ -183,18 +182,18 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col3, _col8, _col11 input vertices: - 1 Map 9 + 1 Map 8 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3) - keys: _col11 (type: int), _col8 (type: int) + keys: _col8 (type: int), _col11 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)) Reducer 4 @@ -206,58 +205,46 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int), _col2 (type: decimal(17,2)) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)) - Reducer 5 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col1 (type: decimal(17,2)) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: int, _col2: decimal(17,2) - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col1 - raw input shape: - window functions: - window function definition - alias: avg_window_0 - arguments: _col2 - name: avg - window function: GenericUDAFAverageEvaluatorDecimal - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + expressions: _col0 (type: int), _col2 (type: decimal(17,2)) + outputColumnNames: _col0, _col2 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: avg_window_0 (type: decimal(21,6)), _col1 (type: int), _col2 (type: decimal(17,2)) - outputColumnNames: avg_window_0, _col1, _col2 + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col2: decimal(17,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDecimal + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: CASE WHEN ((avg_window_0 > 0)) THEN (((abs((_col2 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (null) END (type: boolean) - Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col2 (type: decimal(17,2)), avg_window_0 (type: decimal(21,6)) - outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: int), _col2 (type: decimal(17,2)) + outputColumnNames: avg_window_0, _col0, _col2 + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: CASE WHEN ((avg_window_0 > 0)) THEN (((abs((_col2 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (null) END (type: boolean) Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col2 (type: decimal(21,6)), _col1 (type: decimal(17,2)) - sort order: +++ + Select Operator + expressions: _col0 (type: int), _col2 (type: decimal(17,2)), avg_window_0 (type: decimal(21,6)) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reducer 6 + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: decimal(21,6)), _col1 (type: decimal(17,2)) + sort order: +++ + Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reducer 5 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey2 (type: decimal(17,2)), KEY.reducesinkkey1 (type: decimal(21,6))