http://git-wip-us.apache.org/repos/asf/hive/blob/12a1784d/ql/src/test/results/clientpositive/perf/spark/query19.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query19.q.out b/ql/src/test/results/clientpositive/perf/spark/query19.q.out index 453bc56..281445c 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query19.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query19.q.out @@ -70,7 +70,7 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: int) + 0 _col7 (type: int) 1 _col0 (type: int) Local Work: Map Reduce Local Work @@ -78,35 +78,17 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 8 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 440), Reducer 2 (PARTITION-LEVEL SORT, 440) - Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) - Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 846), Reducer 4 (PARTITION-LEVEL SORT, 846) - Reducer 6 <- Reducer 5 (GROUP, 640) - Reducer 7 <- Reducer 6 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 6 (PARTITION-LEVEL SORT, 855) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1009), Reducer 9 (PARTITION-LEVEL SORT, 1009) + Reducer 4 <- Reducer 3 (GROUP, 582) + Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) + Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 440), Reducer 8 (PARTITION-LEVEL SORT, 440) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: store_sales - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) - Map 10 - Map Operator Tree: - TableScan alias: customer Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -117,30 +99,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Map 11 - Map Operator Tree: - TableScan - alias: customer_address - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ca_address_sk is not null (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ca_address_sk (type: int), ca_zip (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 8 + value expressions: _col0 (type: int) + Map 10 Map Operator Tree: TableScan alias: date_dim @@ -157,7 +121,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Map 9 + Map 11 Map Operator Tree: TableScan alias: item @@ -175,55 +139,59 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_zip (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col9, _col10, _col11, _col12 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col9 (type: int), _col10 (type: string), _col11 (type: int), _col12 (type: string) - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col9, _col10, _col11, _col12, _col15 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col3 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col15 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col15 (type: int) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col9 (type: int), _col10 (type: string), _col11 (type: int), _col12 (type: string) - Reducer 5 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + Reducer 3 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -231,62 +199,62 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col15 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col9, _col10, _col11, _col12, _col17 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col3, _col7, _col8, _col13, _col14, _col15, _col16 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) + 0 _col7 (type: int) 1 _col0 (type: int) - outputColumnNames: _col4, _col9, _col10, _col11, _col12, _col17, _col19 + outputColumnNames: _col3, _col8, _col13, _col14, _col15, _col16, _col19 input vertices: 1 Map 12 - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (substr(_col17, 1, 5) <> substr(_col19, 1, 5)) (type: boolean) - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + predicate: (substr(_col3, 1, 5) <> substr(_col19, 1, 5)) (type: boolean) + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: decimal(7,2)), _col9 (type: int), _col10 (type: string), _col11 (type: int), _col12 (type: string) - outputColumnNames: _col4, _col9, _col10, _col11, _col12 - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + expressions: _col8 (type: decimal(7,2)), _col13 (type: int), _col14 (type: string), _col15 (type: int), _col16 (type: string) + outputColumnNames: _col8, _col13, _col14, _col15, _col16 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col4) - keys: _col10 (type: string), _col9 (type: int), _col11 (type: int), _col12 (type: string) + aggregations: sum(_col8) + keys: _col14 (type: string), _col13 (type: int), _col15 (type: int), _col16 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: string) sort order: ++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: string) - Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: decimal(17,2)) - Reducer 6 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: int), _col3 (type: string), _col4 (type: decimal(17,2)), _col0 (type: string), _col1 (type: int) outputColumnNames: _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col4 (type: decimal(17,2)), _col5 (type: string), _col6 (type: int), _col2 (type: int), _col3 (type: string) sort order: -++++ - Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - Reducer 7 + Reducer 5 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey2 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey0 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE @@ -297,6 +265,38 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col3, _col4, _col9, _col10, _col11, _col12 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col9 (type: int), _col10 (type: string), _col11 (type: int), _col12 (type: string) Stage: Stage-0 Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/12a1784d/ql/src/test/results/clientpositive/perf/spark/query21.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query21.q.out b/ql/src/test/results/clientpositive/perf/spark/query21.q.out index fc9055a..5c3d2fb 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query21.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query21.q.out @@ -58,7 +58,8 @@ select * POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -66,39 +67,44 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 6 + Map 7 Map Operator Tree: TableScan - alias: date_dim - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: warehouse + Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-08 23:00:00.0' AND TIMESTAMP'1998-05-08 00:00:00.0' and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + predicate: w_warehouse_sk is not null (type: boolean) + Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int), d_date (type: string) + expressions: w_warehouse_sk (type: int), w_warehouse_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) Local Work: Map Reduce Local Work - Map 7 + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 5 Map Operator Tree: TableScan - alias: warehouse - Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: w_warehouse_sk is not null (type: boolean) - Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE + predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-08 23:00:00.0' AND TIMESTAMP'1998-05-08 00:00:00.0' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: w_warehouse_sk (type: int), w_warehouse_name (type: string) + expressions: d_date_sk (type: int), d_date (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Local Work: Map Reduce Local Work @@ -106,7 +112,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 6), Map 5 (PARTITION-LEVEL SORT, 6) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 6), Map 6 (PARTITION-LEVEL SORT, 6) Reducer 3 <- Reducer 2 (GROUP, 7) Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### @@ -123,13 +129,25 @@ STAGE PLANS: expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) - Map 5 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col5 + input vertices: + 1 Map 5 + Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col5 (type: string) + Local Work: + Map Reduce Local Work + Map 6 Map Operator Tree: TableScan alias: item @@ -157,44 +175,34 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3, _col5 - Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col2, _col3, _col5, _col7 + Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col5, _col8 + outputColumnNames: _col3, _col5, _col7, _col10 input vertices: - 1 Map 6 - Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col8, _col10 - input vertices: - 1 Map 7 + 1 Map 7 + Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col10 (type: string), _col7 (type: string), CASE WHEN ((CAST( _col5 AS DATE) < DATE'1998-04-08')) THEN (_col3) ELSE (0) END (type: int), CASE WHEN ((CAST( _col5 AS DATE) >= DATE'1998-04-08')) THEN (_col3) ELSE (0) END (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col10 (type: string), _col5 (type: string), CASE WHEN ((CAST( _col8 AS DATE) < DATE'1998-04-08')) THEN (_col3) ELSE (0) END (type: int), CASE WHEN ((CAST( _col8 AS DATE) >= DATE'1998-04-08')) THEN (_col3) ELSE (0) END (type: int) + Group By Operator + aggregations: sum(_col2), sum(_col3) + keys: _col0 (type: string), _col1 (type: string) + mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col2), sum(_col3) - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint), _col3 (type: bigint) + value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator http://git-wip-us.apache.org/repos/asf/hive/blob/12a1784d/ql/src/test/results/clientpositive/perf/spark/query24.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query24.q.out b/ql/src/test/results/clientpositive/perf/spark/query24.q.out index 13ac1e8..1f291c0 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query24.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query24.q.out @@ -131,10 +131,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 472), Map 20 (PARTITION-LEVEL SORT, 472) - Reducer 14 <- Map 21 (PARTITION-LEVEL SORT, 1009), Reducer 13 (PARTITION-LEVEL SORT, 1009) - Reducer 15 <- Map 22 (PARTITION-LEVEL SORT, 846), Reducer 14 (PARTITION-LEVEL SORT, 846) - Reducer 16 <- Map 23 (PARTITION-LEVEL SORT, 587), Reducer 15 (PARTITION-LEVEL SORT, 587) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 975), Map 20 (PARTITION-LEVEL SORT, 975) + Reducer 14 <- Map 21 (PARTITION-LEVEL SORT, 486), Reducer 13 (PARTITION-LEVEL SORT, 486) + Reducer 15 <- Map 22 (PARTITION-LEVEL SORT, 564), Reducer 14 (PARTITION-LEVEL SORT, 564) + Reducer 16 <- Map 23 (PARTITION-LEVEL SORT, 899), Reducer 15 (PARTITION-LEVEL SORT, 899) Reducer 17 <- Reducer 16 (GROUP, 640) Reducer 18 <- Reducer 17 (GROUP, 1) #### A masked pattern was here #### @@ -162,33 +162,16 @@ STAGE PLANS: 1 Map 19 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col3 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col3 (type: int) + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string) + value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string) Local Work: Map Reduce Local Work Map 20 Map Operator Tree: TableScan - alias: store_returns - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (sr_item_sk is not null and sr_ticket_number is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sr_item_sk (type: int), sr_ticket_number (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Map 21 - Map Operator Tree: - TableScan alias: customer Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -204,9 +187,44 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Map 21 + Map Operator Tree: + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_current_price (type: decimal(7,2)), i_size (type: string), i_color (type: string), i_units (type: string), i_manager_id (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(7,2)), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int) Map 22 Map Operator Tree: TableScan + alias: store_returns + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sr_item_sk is not null and sr_ticket_number is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sr_item_sk (type: int), sr_ticket_number (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Map 23 + Map Operator Tree: + TableScan alias: customer_address Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -222,99 +240,81 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string), upper(_col2) (type: string) Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 23 - Map Operator Tree: - TableScan - alias: item - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i_item_sk (type: int), i_current_price (type: decimal(7,2)), i_size (type: string), i_color (type: string), i_units (type: string), i_manager_id (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(7,2)), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int) Reducer 13 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int), _col3 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1, _col4, _col6, _col8, _col9 + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3, _col4, _col6, _col8, _col9, _col11, _col12, _col13 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string) + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string) Reducer 14 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col4, _col6, _col8, _col9, _col13, _col14, _col15 + outputColumnNames: _col0, _col3, _col4, _col6, _col8, _col9, _col11, _col12, _col13, _col15, _col16, _col17, _col18, _col19 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col9 (type: string), _col15 (type: string) + key expressions: _col0 (type: int), _col3 (type: int) sort order: ++ - Map-reduce partition columns: _col9 (type: string), _col15 (type: string) + Map-reduce partition columns: _col0 (type: int), _col3 (type: int) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col13 (type: string), _col14 (type: string) + value expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col15 (type: decimal(7,2)), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: int) Reducer 15 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col9 (type: string), _col15 (type: string) - 1 _col1 (type: string), upper(_col2) (type: string) - outputColumnNames: _col0, _col4, _col6, _col8, _col13, _col14, _col16 + 0 _col0 (type: int), _col3 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col4, _col6, _col8, _col9, _col11, _col12, _col13, _col15, _col16, _col17, _col18, _col19 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col9 (type: string), _col13 (type: string) + sort order: ++ + Map-reduce partition columns: _col9 (type: string), _col13 (type: string) Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col13 (type: string), _col14 (type: string), _col16 (type: string) + value expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col15 (type: decimal(7,2)), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: int) Reducer 16 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col6, _col8, _col13, _col14, _col16, _col20, _col21, _col22, _col23, _col24 + 0 _col9 (type: string), _col13 (type: string) + 1 _col1 (type: string), upper(_col2) (type: string) + outputColumnNames: _col4, _col6, _col8, _col11, _col12, _col15, _col16, _col17, _col18, _col19, _col22 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col4) - keys: _col20 (type: decimal(7,2)), _col21 (type: string), _col22 (type: string), _col23 (type: string), _col24 (type: int), _col16 (type: string), _col13 (type: string), _col14 (type: string), _col6 (type: string), _col8 (type: string) + keys: _col11 (type: string), _col12 (type: string), _col6 (type: string), _col8 (type: string), _col15 (type: decimal(7,2)), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: int), _col22 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: decimal(7,2)), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: string) sort order: ++++++++++ - Map-reduce partition columns: _col0 (type: decimal(7,2)), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: string) Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE value expressions: _col10 (type: decimal(17,2)) Reducer 17 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: decimal(7,2)), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY._col5 (type: string), KEY._col6 (type: string), KEY._col7 (type: string), KEY._col8 (type: string), KEY._col9 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: decimal(7,2)), KEY._col5 (type: string), KEY._col6 (type: string), KEY._col7 (type: string), KEY._col8 (type: int), KEY._col9 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE @@ -353,7 +353,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 7 + Map 8 Map Operator Tree: TableScan alias: store @@ -375,9 +375,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 440), Map 8 (PARTITION-LEVEL SORT, 440) - Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 516), Reducer 2 (PARTITION-LEVEL SORT, 516) - Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 400), Map 7 (PARTITION-LEVEL SORT, 400) + Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 1009), Reducer 2 (PARTITION-LEVEL SORT, 1009) + Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 564), Reducer 3 (PARTITION-LEVEL SORT, 564) Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 899), Reducer 4 (PARTITION-LEVEL SORT, 899) Reducer 6 <- Reducer 5 (GROUP PARTITION-LEVEL SORT, 640) #### A masked pattern was here #### @@ -394,42 +394,29 @@ STAGE PLANS: expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_sales_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col8, _col9 - input vertices: - 1 Map 7 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Map 10 Map Operator Tree: TableScan - alias: customer - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + alias: store_returns + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (c_birth_country is not null and c_customer_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + predicate: (sr_item_sk is not null and sr_ticket_number is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string), c_birth_country (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + expressions: sr_item_sk (type: int), sr_ticket_number (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Map 11 Map Operator Tree: TableScan @@ -448,7 +435,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string), upper(_col2) (type: string) Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 8 + Map 7 Map Operator Tree: TableScan alias: item @@ -469,21 +456,24 @@ STAGE PLANS: Map 9 Map Operator Tree: TableScan - alias: store_returns - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (sr_item_sk is not null and sr_ticket_number is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + predicate: (c_birth_country is not null and c_customer_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: sr_item_sk (type: int), sr_ticket_number (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string), c_birth_country (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Reducer 2 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: @@ -491,64 +481,74 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col8, _col9, _col11, _col12, _col14, _col15 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col3 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col9, _col10 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col15 + input vertices: + 1 Map 8 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string), _col11 (type: decimal(7,2)), _col12 (type: string), _col14 (type: string), _col15 (type: int) + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col12 (type: string), _col14 (type: string), _col15 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int), _col3 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col1, _col4, _col6, _col8, _col9, _col11, _col12, _col14, _col15 + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3, _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col15, _col17, _col18, _col19 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + key expressions: _col0 (type: int), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col3 (type: int) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col9 (type: string), _col11 (type: decimal(7,2)), _col12 (type: string), _col14 (type: string), _col15 (type: int) + value expressions: _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col12 (type: string), _col14 (type: string), _col15 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string) Reducer 4 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col6, _col8, _col9, _col11, _col12, _col14, _col15, _col19, _col20, _col21 + 0 _col0 (type: int), _col3 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col15, _col17, _col18, _col19 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col9 (type: string), _col21 (type: string) + key expressions: _col15 (type: string), _col19 (type: string) sort order: ++ - Map-reduce partition columns: _col9 (type: string), _col21 (type: string) + Map-reduce partition columns: _col15 (type: string), _col19 (type: string) Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col8 (type: string), _col11 (type: decimal(7,2)), _col12 (type: string), _col14 (type: string), _col15 (type: int), _col19 (type: string), _col20 (type: string) + value expressions: _col4 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col12 (type: string), _col14 (type: string), _col17 (type: string), _col18 (type: string) Reducer 5 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col9 (type: string), _col21 (type: string) + 0 _col15 (type: string), _col19 (type: string) 1 _col1 (type: string), upper(_col2) (type: string) - outputColumnNames: _col4, _col6, _col8, _col11, _col12, _col14, _col15, _col19, _col20, _col22 + outputColumnNames: _col4, _col6, _col7, _col9, _col10, _col12, _col14, _col17, _col18, _col22 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col4) - keys: _col19 (type: string), _col20 (type: string), _col6 (type: string), _col22 (type: string), _col8 (type: string), _col11 (type: decimal(7,2)), _col12 (type: string), _col14 (type: string), _col15 (type: int) + keys: _col17 (type: string), _col18 (type: string), _col12 (type: string), _col22 (type: string), _col6 (type: decimal(7,2)), _col7 (type: string), _col9 (type: string), _col10 (type: int), _col14 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col8 (type: int) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string) sort order: +++++++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE @@ -559,17 +559,17 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: decimal(7,2)), KEY._col6 (type: string), KEY._col7 (type: string), KEY._col8 (type: int) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: decimal(7,2)), KEY._col5 (type: string), KEY._col6 (type: string), KEY._col7 (type: int), KEY._col8 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col9 (type: decimal(17,2)) - outputColumnNames: _col1, _col2, _col3, _col9 + outputColumnNames: _col1, _col2, _col7, _col9 Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col9) - keys: _col1 (type: string), _col2 (type: string), _col3 (type: string) + keys: _col1 (type: string), _col2 (type: string), _col7 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 231911707 Data size: 20459318134 Basic stats: COMPLETE Column stats: NONE http://git-wip-us.apache.org/repos/asf/hive/blob/12a1784d/ql/src/test/results/clientpositive/perf/spark/query25.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query25.q.out b/ql/src/test/results/clientpositive/perf/spark/query25.q.out index ddfcb83..3ecb2e5 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query25.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query25.q.out @@ -127,7 +127,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 11 + Map 10 Map Operator Tree: TableScan alias: d3 @@ -150,7 +150,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 13 + Map 12 Map Operator Tree: TableScan alias: d2 @@ -172,12 +172,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 374), Map 9 (PARTITION-LEVEL SORT, 374) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 442), Reducer 2 (PARTITION-LEVEL SORT, 442) - Reducer 4 <- Reducer 10 (PARTITION-LEVEL SORT, 850), Reducer 3 (PARTITION-LEVEL SORT, 850) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 806), Reducer 9 (PARTITION-LEVEL SORT, 806) + Reducer 4 <- Map 13 (PARTITION-LEVEL SORT, 486), Reducer 3 (PARTITION-LEVEL SORT, 486) Reducer 5 <- Reducer 4 (GROUP, 582) Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 374), Map 8 (PARTITION-LEVEL SORT, 374) #### A masked pattern was here #### Vertices: Map 1 @@ -198,7 +198,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)) - Map 12 + Map 11 Map Operator Tree: TableScan alias: store_returns @@ -218,7 +218,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col4 input vertices: - 1 Map 13 + 1 Map 12 Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int) @@ -228,6 +228,24 @@ STAGE PLANS: value expressions: _col3 (type: int), _col4 (type: decimal(7,2)) Local Work: Map Reduce Local Work + Map 13 + Map Operator Tree: + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) Map 7 Map Operator Tree: TableScan @@ -248,24 +266,6 @@ STAGE PLANS: Map 8 Map Operator Tree: TableScan - alias: item - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string) - Map 9 - Map Operator Tree: - TableScan alias: catalog_sales Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -283,7 +283,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3 input vertices: - 1 Map 11 + 1 Map 10 Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int), _col1 (type: int) @@ -293,22 +293,6 @@ STAGE PLANS: value expressions: _col3 (type: decimal(7,2)) Local Work: Map Reduce Local Work - Reducer 10 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int), _col1 (type: int) - 1 _col1 (type: int), _col2 (type: int) - outputColumnNames: _col3, _col8, _col9, _col10, _col11 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int) - sort order: +++ - Map-reduce partition columns: _col8 (type: int), _col9 (type: int), _col10 (type: int) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: decimal(7,2)), _col11 (type: decimal(7,2)) Reducer 2 Reduce Operator Tree: Join Operator @@ -320,27 +304,27 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)) + value expressions: _col3 (type: int), _col5 (type: decimal(7,2)) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col10, _col11 + 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) + 1 _col8 (type: int), _col9 (type: int), _col10 (type: int) + outputColumnNames: _col1, _col3, _col5, _col12, _col20 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: decimal(7,2)), _col10 (type: string), _col11 (type: string) + value expressions: _col3 (type: int), _col5 (type: decimal(7,2)), _col12 (type: decimal(7,2)), _col20 (type: decimal(7,2)) Reducer 4 Local Work: Map Reduce Local Work @@ -349,9 +333,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) - 1 _col8 (type: int), _col9 (type: int), _col10 (type: int) - outputColumnNames: _col3, _col5, _col10, _col11, _col15, _col23 + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col5, _col12, _col20, _col25, _col26 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -359,13 +343,13 @@ STAGE PLANS: keys: 0 _col3 (type: int) 1 _col0 (type: int) - outputColumnNames: _col5, _col10, _col11, _col15, _col23, _col28, _col29 + outputColumnNames: _col5, _col12, _col20, _col25, _col26, _col28, _col29 input vertices: 1 Map 14 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col5), sum(_col23), sum(_col15) - keys: _col10 (type: string), _col11 (type: string), _col28 (type: string), _col29 (type: string) + aggregations: sum(_col5), sum(_col20), sum(_col12) + keys: _col25 (type: string), _col26 (type: string), _col28 (type: string), _col29 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE @@ -406,6 +390,22 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int), _col1 (type: int) + 1 _col1 (type: int), _col2 (type: int) + outputColumnNames: _col3, _col8, _col9, _col10, _col11 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int) + sort order: +++ + Map-reduce partition columns: _col8 (type: int), _col9 (type: int), _col10 (type: int) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(7,2)), _col11 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator
