Repository: hive Updated Branches: refs/heads/branch-3 91d0a0a2f -> 50f144d1f
http://git-wip-us.apache.org/repos/asf/hive/blob/12a1784d/ql/src/test/results/clientpositive/perf/spark/query92.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query92.q.out b/ql/src/test/results/clientpositive/perf/spark/query92.q.out index 70c4c5a..e7b8632 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query92.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query92.q.out @@ -67,7 +67,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 7 + Map 5 Map Operator Tree: TableScan alias: date_dim @@ -90,7 +90,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 10 + Map 9 Map Operator Tree: TableScan alias: date_dim @@ -112,11 +112,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 156), Map 6 (PARTITION-LEVEL SORT, 156) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 270), Reducer 9 (PARTITION-LEVEL SORT, 270) - Reducer 4 <- Reducer 3 (GROUP, 1) - Reducer 5 <- Reducer 4 (SORT, 1) - Reducer 9 <- Map 8 (GROUP, 169) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 261), Reducer 8 (PARTITION-LEVEL SORT, 261) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 7 <- Map 6 (GROUP, 169) + Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 87), Reducer 7 (PARTITION-LEVEL SORT, 87) #### A masked pattern was here #### Vertices: Map 1 @@ -131,13 +131,25 @@ STAGE PLANS: expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_ext_discount_amt (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: decimal(7,2)) - Map 6 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)) + Local Work: + Map Reduce Local Work + Map 10 Map Operator Tree: TableScan alias: item @@ -154,7 +166,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE - Map 8 + Map 6 Map Operator Tree: TableScan alias: web_sales @@ -174,7 +186,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2 input vertices: - 1 Map 10 + 1 Map 9 Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col2), count(_col2) @@ -191,50 +203,22 @@ STAGE PLANS: Local Work: Map Reduce Local Work Reducer 2 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3 - input vertices: - 1 Map 7 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col2, _col7 - Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE + 1 _col2 (type: int) + outputColumnNames: _col2, _col5 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col2 > _col7) (type: boolean) - Statistics: Num rows: 63889187 Data size: 8687081792 Basic stats: COMPLETE Column stats: NONE + predicate: (_col2 > _col5) (type: boolean) + Statistics: Num rows: 58081078 Data size: 7897346909 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: decimal(7,2)) outputColumnNames: _col2 - Statistics: Num rows: 63889187 Data size: 8687081792 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 58081078 Data size: 7897346909 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col2) mode: hash @@ -244,7 +228,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(17,2)) - Reducer 4 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -260,7 +244,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - Reducer 5 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(17,2)) @@ -276,7 +260,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1) @@ -294,6 +278,22 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(38,21)) + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 87121617 Data size: 11846020431 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 87121617 Data size: 11846020431 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(38,21)) Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/12a1784d/ql/src/test/results/clientpositive/perf/spark/query94.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query94.q.out b/ql/src/test/results/clientpositive/perf/spark/query94.q.out index 8b853b9..2af8280 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query94.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query94.q.out @@ -56,7 +56,8 @@ limit 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -82,7 +83,12 @@ STAGE PLANS: 1 _col0 (type: int) Local Work: Map Reduce Local Work - Map 9 + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 8 Map Operator Tree: TableScan alias: date_dim @@ -105,7 +111,7 @@ STAGE PLANS: Spark Edges: Reducer 13 <- Map 12 (GROUP, 11) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 312), Map 8 (PARTITION-LEVEL SORT, 312) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 327), Map 9 (PARTITION-LEVEL SORT, 327) Reducer 3 <- Map 11 (PARTITION-LEVEL SORT, 357), Reducer 2 (PARTITION-LEVEL SORT, 357) Reducer 4 <- Reducer 13 (PARTITION-LEVEL SORT, 230), Reducer 3 (PARTITION-LEVEL SORT, 230) Reducer 5 <- Reducer 4 (GROUP, 124) @@ -125,12 +131,24 @@ STAGE PLANS: expressions: ws_ship_date_sk (type: int), ws_ship_addr_sk (type: int), ws_web_site_sk (type: int), ws_warehouse_sk (type: int), ws_order_number (type: int), ws_ext_ship_cost (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 8 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + Local Work: + Map Reduce Local Work Map 11 Map Operator Tree: TableScan @@ -172,7 +190,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE - Map 8 + Map 9 Map Operator Tree: TableScan alias: customer_address @@ -216,34 +234,24 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col3, _col4, _col5, _col6 input vertices: - 1 Map 9 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5, _col6 - input vertices: - 1 Map 10 + 1 Map 10 + Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: int) - sort order: + - Map-reduce partition columns: _col4 (type: int) - Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + value expressions: _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Reducer 3 Reduce Operator Tree: Join Operator http://git-wip-us.apache.org/repos/asf/hive/blob/12a1784d/ql/src/test/results/clientpositive/perf/spark/query95.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query95.q.out b/ql/src/test/results/clientpositive/perf/spark/query95.q.out index 2c35b83..72bb8af 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query95.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query95.q.out @@ -62,7 +62,8 @@ limit 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -70,39 +71,44 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 9 Map Operator Tree: TableScan - alias: date_dim - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: web_site + Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00.0' AND TIMESTAMP'1999-06-30 00:00:00.0' and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + predicate: ((web_company_name = 'pri') and web_site_sk is not null) (type: boolean) + Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) + expressions: web_site_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) Local Work: Map Reduce Local Work - Map 9 + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 7 Map Operator Tree: TableScan - alias: web_site - Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((web_company_name = 'pri') and web_site_sk is not null) (type: boolean) - Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE + predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00.0' AND TIMESTAMP'1999-06-30 00:00:00.0' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: web_site_sk (type: int) + expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Local Work: Map Reduce Local Work @@ -115,7 +121,7 @@ STAGE PLANS: Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 306), Map 18 (PARTITION-LEVEL SORT, 306) Reducer 16 <- Map 19 (PARTITION-LEVEL SORT, 179), Reducer 15 (PARTITION-LEVEL SORT, 179) Reducer 17 <- Reducer 16 (GROUP, 186) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 312), Map 7 (PARTITION-LEVEL SORT, 312) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 327), Map 8 (PARTITION-LEVEL SORT, 327) Reducer 3 <- Reducer 12 (PARTITION-LEVEL SORT, 381), Reducer 17 (PARTITION-LEVEL SORT, 381), Reducer 2 (PARTITION-LEVEL SORT, 381) Reducer 4 <- Reducer 3 (GROUP, 448) Reducer 5 <- Reducer 4 (GROUP, 1) @@ -134,12 +140,24 @@ STAGE PLANS: expressions: ws_ship_date_sk (type: int), ws_ship_addr_sk (type: int), ws_web_site_sk (type: int), ws_order_number (type: int), ws_ext_ship_cost (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5 + input vertices: + 1 Map 7 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + Local Work: + Map Reduce Local Work Map 10 Map Operator Tree: TableScan @@ -229,7 +247,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE - Map 7 + Map 8 Map Operator Tree: TableScan alias: customer_address @@ -349,34 +367,24 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3, _col4, _col5 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col2, _col3, _col4, _col5 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col5 + outputColumnNames: _col3, _col4, _col5 input vertices: - 1 Map 8 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5 - input vertices: - 1 Map 9 + 1 Map 9 + Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + value expressions: _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Reducer 3 Reduce Operator Tree: Join Operator http://git-wip-us.apache.org/repos/asf/hive/blob/12a1784d/ql/src/test/results/clientpositive/perf/spark/query97.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query97.q.out b/ql/src/test/results/clientpositive/perf/spark/query97.q.out index 278fcf1..14a2aed 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query97.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query97.q.out @@ -102,24 +102,24 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 336) + Reducer 2 <- Map 1 (GROUP, 437) Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 386), Reducer 7 (PARTITION-LEVEL SORT, 386) Reducer 4 <- Reducer 3 (GROUP, 1) - Reducer 7 <- Map 6 (GROUP, 437) + Reducer 7 <- Map 6 (GROUP, 336) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: catalog_sales - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: cs_sold_date_sk is not null (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + predicate: ss_sold_date_sk is not null (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int) + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -129,31 +129,31 @@ STAGE PLANS: outputColumnNames: _col1, _col2 input vertices: 1 Map 5 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col1 (type: int), _col2 (type: int) + keys: _col2 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Map 6 Map Operator Tree: TableScan - alias: store_sales - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: catalog_sales + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ss_sold_date_sk is not null (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: cs_sold_date_sk is not null (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int) + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -163,17 +163,17 @@ STAGE PLANS: outputColumnNames: _col1, _col2 input vertices: 1 Map 8 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col2 (type: int), _col1 (type: int) + keys: _col1 (type: int), _col2 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reducer 2 @@ -182,12 +182,12 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Join Operator @@ -199,7 +199,7 @@ STAGE PLANS: outputColumnNames: _col0, _col2 Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: CASE WHEN ((_col2 is not null and _col0 is null)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((_col2 is null and _col0 is not null)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((_col2 is not null and _col0 is not null)) THEN (1) ELSE (0) END (type: int) + expressions: CASE WHEN ((_col0 is not null and _col2 is null)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((_col0 is null and _col2 is not null)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((_col0 is not null and _col2 is not null)) THEN (1) ELSE (0) END (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -235,12 +235,12 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/12a1784d/ql/src/test/results/clientpositive/perf/spark/query99.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query99.q.out b/ql/src/test/results/clientpositive/perf/spark/query99.q.out index 436eb3c..f59db0a 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query99.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query99.q.out @@ -97,18 +97,18 @@ STAGE PLANS: Map 5 Map Operator Tree: TableScan - alias: ship_mode - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + alias: call_center + Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: sm_ship_mode_sk is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + predicate: cc_call_center_sk is not null (type: boolean) + Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: sm_ship_mode_sk (type: int), sm_type (type: string) + expressions: cc_call_center_sk (type: int), cc_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) Local Work: Map Reduce Local Work @@ -133,18 +133,18 @@ STAGE PLANS: Map 7 Map Operator Tree: TableScan - alias: call_center - Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE + alias: ship_mode + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: cc_call_center_sk is not null (type: boolean) - Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE + predicate: sm_ship_mode_sk is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: cc_call_center_sk (type: int), cc_name (type: string) + expressions: sm_ship_mode_sk (type: int), sm_type (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) Local Work: Map Reduce Local Work @@ -182,34 +182,34 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col4, _col8 + outputColumnNames: _col0, _col1, _col3, _col4, _col8 input vertices: 1 Map 5 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col4 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col8, _col10 + outputColumnNames: _col0, _col1, _col3, _col8, _col10 input vertices: 1 Map 6 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col8, _col10, _col12 input vertices: 1 Map 7 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: substr(_col10, 1, 20) (type: string), _col8 (type: string), _col12 (type: string), CASE WHEN (((_col1 - _col0) <= 30)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 30) and ((_col1 - _col0) <= 60))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 60) and ((_col1 - _col0) <= 90))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 90) and ((_col1 - _col0) <= 120))) THEN (1) ELSE (0) END (type: int), CASE WHEN (((_col1 - _col0) > 120)) THEN (1) ELSE (0) END (type: int) + expressions: substr(_col10, 1, 20) (type: string), _col12 (type: string), _col8 (type: string), CASE WHEN (((_col1 - _col0) <= 30)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 30) and ((_col1 - _col0) <= 60))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 60) and ((_col1 - _col0) <= 90))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 90) and ((_col1 - _col0) <= 120))) THEN (1) ELSE (0) END (type: int), CASE WHEN (((_col1 - _col0) > 120)) THEN (1) ELSE (0) END (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE Group By Operator