http://git-wip-us.apache.org/repos/asf/hive/blob/328d3f93/ql/src/test/results/clientpositive/perf/spark/query81.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query81.q.out b/ql/src/test/results/clientpositive/perf/spark/query81.q.out index 5b2d5b3..23998d6 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query81.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query81.q.out @@ -66,72 +66,37 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Reducer 16 (PARTITION-LEVEL SORT, 262), Reducer 9 (PARTITION-LEVEL SORT, 262) - Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 25), Map 17 (PARTITION-LEVEL SORT, 25) - Reducer 15 <- Map 18 (PARTITION-LEVEL SORT, 344), Reducer 14 (PARTITION-LEVEL SORT, 344) - Reducer 16 <- Reducer 15 (GROUP PARTITION-LEVEL SORT, 349) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 697), Map 5 (PARTITION-LEVEL SORT, 697) - Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 656), Reducer 2 (PARTITION-LEVEL SORT, 656) - Reducer 4 <- Reducer 3 (SORT, 1) - Reducer 7 <- Map 11 (PARTITION-LEVEL SORT, 25), Map 6 (PARTITION-LEVEL SORT, 25) - Reducer 8 <- Map 12 (PARTITION-LEVEL SORT, 344), Reducer 7 (PARTITION-LEVEL SORT, 344) - Reducer 9 <- Reducer 8 (GROUP, 349) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 25), Map 14 (PARTITION-LEVEL SORT, 25) + Reducer 12 <- Map 15 (PARTITION-LEVEL SORT, 344), Reducer 11 (PARTITION-LEVEL SORT, 344) + Reducer 13 <- Reducer 12 (GROUP PARTITION-LEVEL SORT, 349) + Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 697), Map 18 (PARTITION-LEVEL SORT, 697) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 25), Map 8 (PARTITION-LEVEL SORT, 25) + Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 344), Reducer 2 (PARTITION-LEVEL SORT, 344) + Reducer 4 <- Reducer 3 (GROUP, 349) + Reducer 5 <- Reducer 13 (PARTITION-LEVEL SORT, 262), Reducer 4 (PARTITION-LEVEL SORT, 262) + Reducer 6 <- Reducer 17 (PARTITION-LEVEL SORT, 656), Reducer 5 (PARTITION-LEVEL SORT, 656) + Reducer 7 <- Reducer 6 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: customer - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c_customer_sk (type: int), c_customer_id (type: string), c_current_addr_sk (type: int), c_salutation (type: string), c_first_name (type: string), c_last_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - Map 11 - Map Operator Tree: - TableScan - alias: date_dim - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Map 12 - Map Operator Tree: - TableScan - alias: customer_address - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + alias: catalog_returns + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ca_address_sk is not null (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + predicate: (cr_returned_date_sk is not null and cr_returning_addr_sk is not null and cr_returning_customer_sk is not null) (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ca_address_sk (type: int), ca_state (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + expressions: cr_returned_date_sk (type: int), cr_returning_customer_sk (type: int), cr_returning_addr_sk (type: int), cr_return_amt_inc_tax (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 13 + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) + Map 10 Map Operator Tree: TableScan alias: catalog_returns @@ -149,7 +114,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) - Map 17 + Map 14 Map Operator Tree: TableScan alias: date_dim @@ -166,7 +131,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Map 18 + Map 15 Map Operator Tree: TableScan alias: customer_address @@ -184,7 +149,25 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 5 + Map 16 + Map Operator Tree: + TableScan + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_customer_id (type: string), c_current_addr_sk (type: int), c_salutation (type: string), c_first_name (type: string), c_last_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + Map 18 Map Operator Tree: TableScan alias: customer_address @@ -202,48 +185,42 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: decimal(5,2)), _col11 (type: string) - Map 6 + Map 8 Map Operator Tree: TableScan - alias: catalog_returns - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cr_returned_date_sk is not null and cr_returning_addr_sk is not null and cr_returning_customer_sk is not null) (type: boolean) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cr_returned_date_sk (type: int), cr_returning_customer_sk (type: int), cr_returning_addr_sk (type: int), cr_return_amt_inc_tax (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) - Reducer 10 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col2 (type: string) - outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 24200000 Data size: 24560094211 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col2 > CASE WHEN (_col4 is null) THEN (null) ELSE (_col3) END) (type: boolean) - Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: decimal(17,2)) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)) - Reducer 14 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Map 9 + Map Operator Tree: + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 11 Reduce Operator Tree: Join Operator condition map: @@ -259,7 +236,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) - Reducer 15 + Reducer 12 Reduce Operator Tree: Join Operator condition map: @@ -281,7 +258,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)) - Reducer 16 + Reducer 13 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -309,7 +286,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(38,11)), _col1 (type: boolean) - Reducer 2 + Reducer 17 Reduce Operator Tree: Join Operator condition map: @@ -325,46 +302,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: decimal(5,2)), _col17 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col4, _col5, _col7, _col8, _col9, _col10, _col11, _col12, _col14, _col15, _col16, _col17, _col20 - Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: decimal(5,2)), _col17 (type: string), _col20 (type: decimal(17,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: decimal(5,2)), _col13 (type: string), _col14 (type: decimal(17,2)) - sort order: +++++++++++++++ - Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: string), KEY.reducesinkkey7 (type: string), KEY.reducesinkkey8 (type: string), KEY.reducesinkkey9 (type: string), KEY.reducesinkkey10 (type: string), KEY.reducesinkkey11 (type: string), KEY.reducesinkkey12 (type: decimal(5,2)), KEY.reducesinkkey13 (type: string), KEY.reducesinkkey14 (type: decimal(17,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 100 - Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), 'IL' (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: decimal(5,2)), _col13 (type: string), _col14 (type: decimal(17,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 2 Reduce Operator Tree: Join Operator condition map: @@ -380,7 +318,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) - Reducer 8 + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -402,7 +340,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)) - Reducer 9 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -420,6 +358,64 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: decimal(17,2)) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 24200000 Data size: 24560094211 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 > CASE WHEN (_col4 is null) THEN (null) ELSE (_col3) END) (type: boolean) + Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(17,2)) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col7, _col9, _col10, _col11, _col13, _col14, _col15, _col16, _col17, _col18, _col20, _col21, _col22, _col23 + Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: decimal(5,2)), _col23 (type: string), _col2 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: decimal(5,2)), _col13 (type: string), _col14 (type: decimal(17,2)) + sort order: +++++++++++++++ + Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reducer 7 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: string), KEY.reducesinkkey7 (type: string), KEY.reducesinkkey8 (type: string), KEY.reducesinkkey9 (type: string), KEY.reducesinkkey10 (type: string), KEY.reducesinkkey11 (type: string), KEY.reducesinkkey12 (type: decimal(5,2)), KEY.reducesinkkey13 (type: string), KEY.reducesinkkey14 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), 'IL' (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: decimal(5,2)), _col13 (type: string), _col14 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/328d3f93/ql/src/test/results/clientpositive/perf/spark/query82.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query82.q.out b/ql/src/test/results/clientpositive/perf/spark/query82.q.out index fb30019..f734e87 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query82.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query82.q.out @@ -40,7 +40,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 7 + Map 6 Map Operator Tree: TableScan alias: date_dim @@ -62,7 +62,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 5 (PARTITION-LEVEL SORT, 398), Map 6 (PARTITION-LEVEL SORT, 398) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 5 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) Reducer 3 <- Reducer 2 (GROUP, 874) Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### @@ -102,7 +102,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 25666 Data size: 36863512 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)) - Map 6 + Map 7 Map Operator Tree: TableScan alias: inventory @@ -120,14 +120,14 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1 + outputColumnNames: _col3 input vertices: - 1 Map 7 + 0 Map 6 Statistics: Num rows: 4593600 Data size: 72578135 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col3 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col3 (type: int) Statistics: Num rows: 4593600 Data size: 72578135 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work @@ -140,7 +140,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col1 (type: int) + 2 _col3 (type: int) outputColumnNames: _col2, _col3, _col4 Statistics: Num rows: 1267190424 Data size: 111791907016 Basic stats: COMPLETE Column stats: NONE Group By Operator http://git-wip-us.apache.org/repos/asf/hive/blob/328d3f93/ql/src/test/results/clientpositive/perf/spark/query83.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query83.q.out b/ql/src/test/results/clientpositive/perf/spark/query83.q.out index 614ae49..d2d91e4 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query83.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query83.q.out @@ -138,65 +138,47 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 2), Map 13 (PARTITION-LEVEL SORT, 2) - Reducer 12 <- Reducer 11 (GROUP, 2) - Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 41), Map 18 (PARTITION-LEVEL SORT, 41) - Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 40), Reducer 20 (PARTITION-LEVEL SORT, 40) - Reducer 17 <- Reducer 16 (GROUP, 43) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 30), Map 7 (PARTITION-LEVEL SORT, 30) - Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 2), Reducer 23 (PARTITION-LEVEL SORT, 2) - Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 2), Map 24 (PARTITION-LEVEL SORT, 2) - Reducer 23 <- Reducer 22 (GROUP, 2) - Reducer 26 <- Map 25 (PARTITION-LEVEL SORT, 16), Map 29 (PARTITION-LEVEL SORT, 16) - Reducer 27 <- Reducer 26 (PARTITION-LEVEL SORT, 13), Reducer 31 (PARTITION-LEVEL SORT, 13) - Reducer 28 <- Reducer 27 (GROUP, 13) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 27), Reducer 9 (PARTITION-LEVEL SORT, 27) - Reducer 31 <- Map 30 (PARTITION-LEVEL SORT, 2), Reducer 34 (PARTITION-LEVEL SORT, 2) - Reducer 33 <- Map 32 (PARTITION-LEVEL SORT, 2), Map 35 (PARTITION-LEVEL SORT, 2) - Reducer 34 <- Reducer 33 (GROUP, 2) - Reducer 4 <- Reducer 3 (GROUP, 29) - Reducer 5 <- Reducer 17 (PARTITION-LEVEL SORT, 42), Reducer 28 (PARTITION-LEVEL SORT, 42), Reducer 4 (PARTITION-LEVEL SORT, 42) - Reducer 6 <- Reducer 5 (SORT, 1) - Reducer 9 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 12 (PARTITION-LEVEL SORT, 2) + Reducer 10 <- Reducer 9 (GROUP, 2) + Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 2), Reducer 21 (PARTITION-LEVEL SORT, 2) + Reducer 16 <- Map 23 (PARTITION-LEVEL SORT, 36), Reducer 15 (PARTITION-LEVEL SORT, 36) + Reducer 17 <- Map 24 (PARTITION-LEVEL SORT, 44), Reducer 16 (PARTITION-LEVEL SORT, 44) + Reducer 18 <- Reducer 17 (GROUP, 43) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 10 (PARTITION-LEVEL SORT, 2) + Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 2), Map 22 (PARTITION-LEVEL SORT, 2) + Reducer 21 <- Reducer 20 (GROUP, 2) + Reducer 26 <- Map 25 (PARTITION-LEVEL SORT, 2), Reducer 32 (PARTITION-LEVEL SORT, 2) + Reducer 27 <- Map 34 (PARTITION-LEVEL SORT, 12), Reducer 26 (PARTITION-LEVEL SORT, 12) + Reducer 28 <- Map 35 (PARTITION-LEVEL SORT, 17), Reducer 27 (PARTITION-LEVEL SORT, 17) + Reducer 29 <- Reducer 28 (GROUP, 13) + Reducer 3 <- Map 12 (PARTITION-LEVEL SORT, 25), Reducer 2 (PARTITION-LEVEL SORT, 25) + Reducer 31 <- Map 30 (PARTITION-LEVEL SORT, 2), Map 33 (PARTITION-LEVEL SORT, 2) + Reducer 32 <- Reducer 31 (GROUP, 2) + Reducer 4 <- Map 13 (PARTITION-LEVEL SORT, 32), Reducer 3 (PARTITION-LEVEL SORT, 32) + Reducer 5 <- Reducer 4 (GROUP, 29) + Reducer 6 <- Reducer 18 (PARTITION-LEVEL SORT, 42), Reducer 29 (PARTITION-LEVEL SORT, 42), Reducer 5 (PARTITION-LEVEL SORT, 42) + Reducer 7 <- Reducer 6 (SORT, 1) + Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: catalog_returns - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cr_item_sk is not null and cr_returned_date_sk is not null) (type: boolean) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cr_returned_date_sk (type: int), cr_item_sk (type: int), cr_return_quantity (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int) - Map 10 - Map Operator Tree: - TableScan alias: date_dim Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d_date is not null and d_week_seq is not null) (type: boolean) + predicate: (d_date is not null and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date (type: string), d_week_seq (type: int) + expressions: d_date_sk (type: int), d_date (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Map 13 + value expressions: _col0 (type: int) + Map 11 Map Operator Tree: TableScan alias: date_dim @@ -218,25 +200,25 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE - Map 14 + Map 12 Map Operator Tree: TableScan - alias: store_returns - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + alias: catalog_returns + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (sr_item_sk is not null and sr_returned_date_sk is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + predicate: (cr_item_sk is not null and cr_returned_date_sk is not null) (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_return_quantity (type: int) + expressions: cr_returned_date_sk (type: int), cr_item_sk (type: int), cr_return_quantity (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int) - Map 18 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) + Map 13 Map Operator Tree: TableScan alias: item @@ -254,7 +236,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 19 + Map 14 Map Operator Tree: TableScan alias: date_dim @@ -272,7 +254,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) - Map 21 + Map 19 Map Operator Tree: TableScan alias: date_dim @@ -290,7 +272,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 24 + Map 22 Map Operator Tree: TableScan alias: date_dim @@ -312,25 +294,25 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE - Map 25 + Map 23 Map Operator Tree: TableScan - alias: web_returns - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + alias: store_returns + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (wr_item_sk is not null and wr_returned_date_sk is not null) (type: boolean) - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + predicate: (sr_item_sk is not null and sr_returned_date_sk is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: wr_returned_date_sk (type: int), wr_item_sk (type: int), wr_return_quantity (type: int) + expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_return_quantity (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int) - Map 29 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) + Map 24 Map Operator Tree: TableScan alias: item @@ -348,7 +330,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 30 + Map 25 Map Operator Tree: TableScan alias: date_dim @@ -366,7 +348,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) - Map 32 + Map 30 Map Operator Tree: TableScan alias: date_dim @@ -384,7 +366,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 35 + Map 33 Map Operator Tree: TableScan alias: date_dim @@ -406,7 +388,25 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE - Map 7 + Map 34 + Map Operator Tree: + TableScan + alias: web_returns + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (wr_item_sk is not null and wr_returned_date_sk is not null) (type: boolean) + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: wr_returned_date_sk (type: int), wr_item_sk (type: int), wr_return_quantity (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) + Map 35 Map Operator Tree: TableScan alias: item @@ -430,39 +430,19 @@ STAGE PLANS: alias: date_dim Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d_date is not null and d_date_sk is not null) (type: boolean) + predicate: (d_date is not null and d_week_seq is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int), d_date (type: string) + expressions: d_date (type: string), d_week_seq (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Reducer 11 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 12 + value expressions: _col0 (type: string) + Reducer 10 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -480,16 +460,15 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col4 - Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col4 (type: string) + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE Reducer 16 Reduce Operator Tree: Join Operator @@ -498,11 +477,27 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4 + outputColumnNames: _col4, _col5 + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 17 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col7 Statistics: Num rows: 69685294 Data size: 5399255980 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col4 (type: string) + aggregations: sum(_col5) + keys: _col7 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 69685294 Data size: 5399255980 Basic stats: COMPLETE Column stats: NONE @@ -512,7 +507,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 69685294 Data size: 5399255980 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 17 + Reducer 18 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -532,22 +527,6 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col4 - Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col4 (type: string) - Reducer 20 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 @@ -557,7 +536,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 22 + Reducer 20 Reduce Operator Tree: Join Operator condition map: @@ -577,7 +556,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 23 + Reducer 21 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -595,16 +574,15 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col4 - Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col4 (type: string) + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE Reducer 27 Reduce Operator Tree: Join Operator @@ -613,11 +591,27 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4 + outputColumnNames: _col4, _col5 + Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 28 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col7 Statistics: Num rows: 17422145 Data size: 1603485031 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col4 (type: string) + aggregations: sum(_col5) + keys: _col7 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 17422145 Data size: 1603485031 Basic stats: COMPLETE Column stats: NONE @@ -627,7 +621,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 17422145 Data size: 1603485031 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 28 + Reducer 29 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -649,36 +643,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4 - Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col2) - keys: _col4 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 31 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col4, _col5 + Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col4 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 33 + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int) + Reducer 31 Reduce Operator Tree: Join Operator condition map: @@ -698,7 +671,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 34 + Reducer 32 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -712,6 +685,28 @@ STAGE PLANS: Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE Reducer 4 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col7 + Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col5) + keys: _col7 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 5 + Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -724,7 +719,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 17423323 Data size: 1849627061 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 5 + Reducer 6 Reduce Operator Tree: Join Operator condition map: @@ -746,7 +741,7 @@ STAGE PLANS: Statistics: Num rows: 76653825 Data size: 5939181706 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: double), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: decimal(25,6)) - Reducer 6 + Reducer 7 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: double), VALUE._col1 (type: bigint), VALUE._col2 (type: double), VALUE._col3 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: decimal(25,6)) @@ -766,17 +761,22 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) + 0 _col1 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator