http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query65.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query65.q.out b/ql/src/test/results/clientpositive/perf/spark/query65.q.out index 575fc5c..3b3baef 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query65.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query65.q.out @@ -57,8 +57,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -66,29 +65,6 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 11 - Map Operator Tree: - TableScan - alias: store - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: s_store_sk is not null (type: boolean) - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s_store_sk (type: int), s_store_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: Map 6 Map Operator Tree: TableScan @@ -108,7 +84,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-4 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -135,8 +111,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 437) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 328), Reducer 8 (PARTITION-LEVEL SORT, 328) - Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 86), Reducer 3 (PARTITION-LEVEL SORT, 86) + Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 328), Reducer 2 (PARTITION-LEVEL SORT, 328), Reducer 8 (PARTITION-LEVEL SORT, 328) + Reducer 4 <- Map 11 (PARTITION-LEVEL SORT, 166), Reducer 3 (PARTITION-LEVEL SORT, 166) Reducer 5 <- Reducer 4 (SORT, 1) Reducer 8 <- Map 7 (GROUP PARTITION-LEVEL SORT, 437) #### A masked pattern was here #### @@ -180,6 +156,24 @@ STAGE PLANS: Map 10 Map Operator Tree: TableScan + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int), s_store_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 11 + Map Operator Tree: + TableScan alias: item Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -250,23 +244,23 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + 2 _col0 (type: int) + outputColumnNames: _col1, _col2, _col4, _col6 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col2 <= (0.1 * _col4)) (type: boolean) - Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 232318249 Data size: 20495183367 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: decimal(17,2)) + Statistics: Num rows: 232318249 Data size: 20495183367 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(17,2)), _col6 (type: string) Reducer 4 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: @@ -274,34 +268,24 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col6, _col7, _col8, _col9 - Statistics: Num rows: 127775039 Data size: 11272351047 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col6, _col7, _col8, _col9, _col11 - input vertices: - 1 Map 11 - Statistics: Num rows: 140552545 Data size: 12399586420 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col11 (type: string), _col6 (type: string), _col2 (type: decimal(17,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 140552545 Data size: 12399586420 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 140552545 Data size: 12399586420 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: string) + outputColumnNames: _col2, _col6, _col8, _col9, _col10, _col11 + Statistics: Num rows: 255550079 Data size: 22544702192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col6 (type: string), _col8 (type: string), _col2 (type: decimal(17,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 255550079 Data size: 22544702192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 255550079 Data size: 22544702192 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: string) Reducer 5 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(7,2)), VALUE._col2 (type: decimal(7,2)), VALUE._col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 140552545 Data size: 12399586420 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 255550079 Data size: 22544702192 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query66.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query66.q.out b/ql/src/test/results/clientpositive/perf/spark/query66.q.out index 15654f9..17e6ac7 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query66.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query66.q.out @@ -582,7 +582,7 @@ STAGE PLANS: Edges: Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 336), Map 14 (PARTITION-LEVEL SORT, 336) Reducer 12 <- Reducer 11 (GROUP, 447) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 169), Map 14 (PARTITION-LEVEL SORT, 169) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 169), Map 7 (PARTITION-LEVEL SORT, 169) Reducer 3 <- Reducer 2 (GROUP, 224) Reducer 4 <- Reducer 12 (GROUP, 336), Reducer 3 (GROUP, 336) Reducer 5 <- Reducer 4 (SORT, 1) @@ -666,6 +666,24 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_moy (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) Reducer 11 Local Work: Map Reduce Local Work http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query67.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query67.q.out b/ql/src/test/results/clientpositive/perf/spark/query67.q.out index a3252b6..26f6775 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query67.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query67.q.out @@ -86,8 +86,7 @@ limit 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -114,36 +113,14 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 6 - Map Operator Tree: - TableScan - alias: date_dim - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int), d_year (type: int), d_moy (type: int), d_qoy (type: int) - outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 442), Map 7 (PARTITION-LEVEL SORT, 442) - Reducer 3 <- Reducer 2 (GROUP, 1009) - Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 1009) - Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) + Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 486), Reducer 2 (PARTITION-LEVEL SORT, 486) + Reducer 4 <- Reducer 3 (GROUP, 1009) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 1009) + Reducer 6 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -158,27 +135,33 @@ STAGE PLANS: expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_quantity (type: int), ss_sales_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col7, _col8, _col9 - input vertices: - 1 Map 6 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col7 (type: int), _col8 (type: int), _col9 (type: int) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Map 7 Map Operator Tree: TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_year (type: int), d_moy (type: int), d_qoy (type: int) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int) + Map 9 + Map Operator Tree: + TableScan alias: item Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -202,37 +185,53 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col7, _col8, _col9, _col11, _col12, _col13, _col14 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2, _col3, _col4, _col7, _col8, _col9 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col7, _col8, _col9, _col11, _col12, _col13, _col14, _col16 + outputColumnNames: _col1, _col3, _col4, _col7, _col8, _col9, _col11 input vertices: 1 Map 8 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col7 (type: int), _col8 (type: int), _col9 (type: int), _col11 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col4, _col7, _col8, _col9, _col11, _col13, _col14, _col15, _col16 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col15 (type: string), _col14 (type: string), _col13 (type: string), _col16 (type: string), _col7 (type: int), _col9 (type: int), _col8 (type: int), _col11 (type: string), COALESCE((_col4 * CAST( _col3 AS decimal(10,0))),0) (type: decimal(18,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col13 (type: string), _col12 (type: string), _col11 (type: string), _col14 (type: string), _col7 (type: int), _col9 (type: int), _col8 (type: int), _col16 (type: string), COALESCE((_col4 * CAST( _col3 AS decimal(10,0))),0) (type: decimal(18,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col8) - keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: string), 0L (type: bigint) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Group By Operator + aggregations: sum(_col8) + keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: string), 0L (type: bigint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 6899852151 Data size: 608706960084 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: string), _col8 (type: bigint) + sort order: +++++++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: string), _col8 (type: bigint) Statistics: Num rows: 6899852151 Data size: 608706960084 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: string), _col8 (type: bigint) - sort order: +++++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: string), _col8 (type: bigint) - Statistics: Num rows: 6899852151 Data size: 608706960084 Basic stats: COMPLETE Column stats: NONE - value expressions: _col9 (type: decimal(28,2)) - Reducer 3 + value expressions: _col9 (type: decimal(28,2)) + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -252,7 +251,7 @@ STAGE PLANS: Statistics: Num rows: 3449926075 Data size: 304353479997 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: string) - Reducer 4 + Reducer 5 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: string), KEY.reducesinkkey1 (type: decimal(28,2)) @@ -291,7 +290,7 @@ STAGE PLANS: sort order: ++++++++++ Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - Reducer 5 + Reducer 6 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: int), KEY.reducesinkkey7 (type: string), KEY.reducesinkkey8 (type: decimal(28,2)), KEY.reducesinkkey9 (type: int) http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query68.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query68.q.out b/ql/src/test/results/clientpositive/perf/spark/query68.q.out index 5585a54..585aa40 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query68.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query68.q.out @@ -93,6 +93,24 @@ STAGE PLANS: Map 10 Map Operator Tree: TableScan + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((s_city) IN ('Cedar Grove', 'Wildwood') and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + Map 11 + Map Operator Tree: + TableScan alias: household_demographics Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -108,7 +126,7 @@ STAGE PLANS: 1 _col0 (type: int) Local Work: Map Reduce Local Work - Map 8 + Map 9 Map Operator Tree: TableScan alias: date_dim @@ -126,33 +144,15 @@ STAGE PLANS: 1 _col0 (type: int) Local Work: Map Reduce Local Work - Map 9 - Map Operator Tree: - TableScan - alias: store - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((s_city) IN ('Cedar Grove', 'Wildwood') and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 829), Reducer 7 (PARTITION-LEVEL SORT, 829) - Reducer 3 <- Map 12 (PARTITION-LEVEL SORT, 637), Reducer 2 (PARTITION-LEVEL SORT, 637) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 5 (PARTITION-LEVEL SORT, 855) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 882), Reducer 8 (PARTITION-LEVEL SORT, 882) Reducer 4 <- Reducer 3 (SORT, 1) - Reducer 6 <- Map 11 (PARTITION-LEVEL SORT, 846), Map 5 (PARTITION-LEVEL SORT, 846) - Reducer 7 <- Reducer 6 (GROUP, 582) + Reducer 7 <- Map 12 (PARTITION-LEVEL SORT, 846), Map 6 (PARTITION-LEVEL SORT, 846) + Reducer 8 <- Reducer 7 (GROUP, 582) #### A masked pattern was here #### Vertices: Map 1 @@ -168,12 +168,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: string) - Map 11 + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string) + Map 12 Map Operator Tree: TableScan alias: customer_address @@ -191,7 +191,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 12 + Map 5 Map Operator Tree: TableScan alias: current_addr @@ -209,7 +209,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 5 + Map 6 Map Operator Tree: TableScan alias: store_sales @@ -229,7 +229,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 input vertices: - 1 Map 8 + 1 Map 9 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -239,7 +239,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col7, _col8 input vertices: - 1 Map 9 + 1 Map 10 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -249,7 +249,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col3, _col5, _col6, _col7, _col8 input vertices: - 1 Map 10 + 1 Map 11 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: int) @@ -265,37 +265,37 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 - Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col5 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: int), _col6 (type: string), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2)), _col9 (type: decimal(17,2)) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col5 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col6, _col7, _col8, _col9, _col11 - Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col2, _col3, _col5, _col6, _col8, _col9, _col10, _col11 + Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col11 <> _col6) (type: boolean) - Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE + predicate: (_col5 <> _col8) (type: boolean) + Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col11 (type: string), _col6 (type: string), _col4 (type: int), _col7 (type: decimal(17,2)), _col9 (type: decimal(17,2)), _col8 (type: decimal(17,2)) + expressions: _col3 (type: string), _col2 (type: string), _col5 (type: string), _col8 (type: string), _col6 (type: int), _col9 (type: decimal(17,2)), _col11 (type: decimal(17,2)), _col10 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col4 (type: int) sort order: ++ - Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)) Reducer 4 @@ -303,7 +303,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey1 (type: int), VALUE._col3 (type: decimal(17,2)), VALUE._col4 (type: decimal(17,2)), VALUE._col5 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE @@ -314,7 +314,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 7 Reduce Operator Tree: Join Operator condition map: @@ -336,7 +336,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int) Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)) - Reducer 7 + Reducer 8 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query72.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query72.q.out b/ql/src/test/results/clientpositive/perf/spark/query72.q.out index bb71c10..d204803 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query72.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query72.q.out @@ -61,8 +61,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -70,27 +69,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 20 - Map Operator Tree: - TableScan - alias: promotion - Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_promo_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col13 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 11 + Map 7 Map Operator Tree: TableScan alias: warehouse @@ -109,11 +88,11 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-4 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 16 + Map 15 Map Operator Tree: TableScan alias: household_demographics @@ -131,75 +110,85 @@ STAGE PLANS: 1 _col0 (type: int) Local Work: Map Reduce Local Work + Map 16 + Map Operator Tree: + TableScan + alias: promotion + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_promo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 19 (PARTITION-LEVEL SORT, 165), Reducer 9 (PARTITION-LEVEL SORT, 165) - Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 308), Map 15 (PARTITION-LEVEL SORT, 308) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 90), Reducer 10 (PARTITION-LEVEL SORT, 90) - Reducer 3 <- Reducer 2 (GROUP, 73) - Reducer 4 <- Reducer 3 (SORT, 1) - Reducer 6 <- Map 12 (PARTITION-LEVEL SORT, 6), Map 5 (PARTITION-LEVEL SORT, 6) - Reducer 7 <- Reducer 14 (PARTITION-LEVEL SORT, 375), Reducer 6 (PARTITION-LEVEL SORT, 375) - Reducer 8 <- Map 17 (PARTITION-LEVEL SORT, 136), Reducer 7 (PARTITION-LEVEL SORT, 136) - Reducer 9 <- Map 18 (PARTITION-LEVEL SORT, 154), Reducer 8 (PARTITION-LEVEL SORT, 154) + Reducer 10 <- Map 14 (PARTITION-LEVEL SORT, 338), Reducer 9 (PARTITION-LEVEL SORT, 338) + Reducer 11 <- Map 17 (PARTITION-LEVEL SORT, 452), Reducer 10 (PARTITION-LEVEL SORT, 452) + Reducer 12 <- Map 18 (PARTITION-LEVEL SORT, 492), Reducer 11 (PARTITION-LEVEL SORT, 492) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 186), Reducer 12 (PARTITION-LEVEL SORT, 186) + Reducer 3 <- Map 19 (PARTITION-LEVEL SORT, 67), Reducer 2 (PARTITION-LEVEL SORT, 67) + Reducer 4 <- Map 20 (PARTITION-LEVEL SORT, 97), Reducer 3 (PARTITION-LEVEL SORT, 97) + Reducer 5 <- Reducer 4 (GROUP, 80) + Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 9 <- Map 13 (PARTITION-LEVEL SORT, 306), Map 8 (PARTITION-LEVEL SORT, 306) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: catalog_returns - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + alias: inventory + Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: cr_item_sk is not null (type: boolean) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + predicate: (inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) (type: boolean) + Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cr_item_sk (type: int), cr_order_number (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - Map 12 + expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col5 + input vertices: + 1 Map 7 + Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int), _col5 (type: string) + Local Work: + Map Reduce Local Work + Map 13 Map Operator Tree: TableScan - alias: d2 + alias: d1 Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d_date_sk is not null and d_week_seq is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_year = 2001) and d_date_sk is not null and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int), d_week_seq (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int), d_date (type: string), d_week_seq (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Map 13 - Map Operator Tree: - TableScan - alias: catalog_sales - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_item_sk is not null and cs_ship_date_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cs_sold_date_sk (type: int), cs_ship_date_sk (type: int), cs_bill_cdemo_sk (type: int), cs_bill_hdemo_sk (type: int), cs_item_sk (type: int), cs_promo_sk (type: int), cs_order_number (type: int), cs_quantity (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int) - Map 15 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int) + Map 14 Map Operator Tree: TableScan alias: customer_demographics @@ -219,24 +208,6 @@ STAGE PLANS: Map 17 Map Operator Tree: TableScan - alias: d1 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 2001) and d_date_sk is not null and d_week_seq is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int), d_date (type: string), d_week_seq (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col2 (type: int), _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 18 - Map Operator Tree: - TableScan alias: item Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -252,7 +223,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 19 + Map 18 Map Operator Tree: TableScan alias: d3 @@ -270,36 +241,58 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 5 + Map 19 Map Operator Tree: TableScan - alias: inventory - Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE + alias: d2 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) (type: boolean) - Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE + predicate: (d_date_sk is not null and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col5 - input vertices: - 1 Map 11 - Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: string) - Local Work: - Map Reduce Local Work + expressions: d_date_sk (type: int), d_week_seq (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Map 20 + Map Operator Tree: + TableScan + alias: catalog_returns + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cr_item_sk is not null (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cr_item_sk (type: int), cr_order_number (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + Map 8 + Map Operator Tree: + TableScan + alias: catalog_sales + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_item_sk is not null and cs_ship_date_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_ship_date_sk (type: int), cs_bill_cdemo_sk (type: int), cs_bill_hdemo_sk (type: int), cs_item_sk (type: int), cs_promo_sk (type: int), cs_order_number (type: int), cs_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int) Reducer 10 Local Work: Map Reduce Local Work @@ -308,107 +301,160 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col9 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col5, _col12, _col13, _col14, _col21, _col22, _col25, _col27 - Statistics: Num rows: 170063873 Data size: 23030065010 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(_col27) > (UDFToDouble(_col21) + 5.0D)) (type: boolean) - Statistics: Num rows: 56687957 Data size: 7676688246 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col9, _col10 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col9, _col10 + input vertices: + 1 Map 15 + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join 0 to 1 keys: - 0 _col13 (type: int) + 0 _col5 (type: int) 1 _col0 (type: int) - outputColumnNames: _col5, _col12, _col14, _col22, _col25, _col28 + outputColumnNames: _col1, _col4, _col6, _col7, _col9, _col10, _col16 input vertices: - 1 Map 20 - Statistics: Num rows: 62356754 Data size: 8444357253 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col12 (type: int), _col14 (type: int), _col5 (type: string), _col25 (type: string), _col22 (type: int), _col28 (type: int) - outputColumnNames: _col4, _col6, _col13, _col15, _col22, _col28 - Statistics: Num rows: 62356754 Data size: 8444357253 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: int), _col6 (type: int) - sort order: ++ - Map-reduce partition columns: _col4 (type: int), _col6 (type: int) - Statistics: Num rows: 62356754 Data size: 8444357253 Basic stats: COMPLETE Column stats: NONE - value expressions: _col13 (type: string), _col15 (type: string), _col22 (type: int), _col28 (type: int) - Reducer 14 - Local Work: - Map Reduce Local Work + 1 Map 16 + Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: string), _col10 (type: int), _col16 (type: int) + Reducer 11 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col4 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col4, _col5, _col6, _col7 - input vertices: - 1 Map 16 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: int) - sort order: + - Map-reduce partition columns: _col4 (type: int) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int) + outputColumnNames: _col1, _col4, _col6, _col7, _col9, _col10, _col16, _col18 + Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: string), _col10 (type: int), _col16 (type: int), _col18 (type: string) + Reducer 12 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col6, _col7, _col9, _col10, _col16, _col18, _col20 + Statistics: Num rows: 510191624 Data size: 69090195216 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(_col20) > (UDFToDouble(_col9) + 5.0D)) (type: boolean) + Statistics: Num rows: 170063874 Data size: 23030064981 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col18 (type: string), _col4 (type: int), _col6 (type: int), _col7 (type: int), _col10 (type: int), _col16 (type: int) + outputColumnNames: _col3, _col8, _col10, _col11, _col14, _col20 + Statistics: Num rows: 170063874 Data size: 23030064981 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col8 (type: int) + sort order: + + Map-reduce partition columns: _col8 (type: int) + Statistics: Num rows: 170063874 Data size: 23030064981 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string), _col10 (type: int), _col11 (type: int), _col14 (type: int), _col20 (type: int) Reducer 2 Reduce Operator Tree: Join Operator condition map: - Right Outer Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col0 (type: int), _col1 (type: int) - 1 _col4 (type: int), _col6 (type: int) - outputColumnNames: _col15, _col17, _col24, _col30 - Statistics: Num rows: 68592430 Data size: 9288793179 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: int) + 1 _col8 (type: int) + outputColumnNames: _col0, _col3, _col5, _col9, _col14, _col16, _col17, _col20, _col26 + Statistics: Num rows: 187070265 Data size: 25333072028 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col3 < _col17) (type: boolean) + Statistics: Num rows: 62356755 Data size: 8444357342 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col20 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col20 (type: int) + Statistics: Num rows: 62356755 Data size: 8444357342 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: string), _col9 (type: string), _col14 (type: int), _col16 (type: int), _col26 (type: int) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col20 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col5, _col9, _col14, _col16, _col20, _col26 + Statistics: Num rows: 68592431 Data size: 9288793277 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col17 (type: string), _col15 (type: string), _col24 (type: int), CASE WHEN (_col30 is null) THEN (1) ELSE (0) END (type: int), CASE WHEN (_col30 is not null) THEN (1) ELSE (0) END (type: int) + expressions: _col14 (type: int), _col16 (type: int), _col5 (type: string), _col9 (type: string), _col20 (type: int), _col26 (type: int) + outputColumnNames: _col4, _col6, _col13, _col15, _col22, _col28 + Statistics: Num rows: 68592431 Data size: 9288793277 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int), _col6 (type: int) + sort order: ++ + Map-reduce partition columns: _col4 (type: int), _col6 (type: int) + Statistics: Num rows: 68592431 Data size: 9288793277 Basic stats: COMPLETE Column stats: NONE + value expressions: _col13 (type: string), _col15 (type: string), _col22 (type: int), _col28 (type: int) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col4 (type: int), _col6 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col13, _col15, _col22, _col28 + Statistics: Num rows: 75451675 Data size: 10217672826 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col15 (type: string), _col13 (type: string), _col22 (type: int), CASE WHEN (_col28 is null) THEN (1) ELSE (0) END (type: int), CASE WHEN (_col28 is not null) THEN (1) ELSE (0) END (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 68592430 Data size: 9288793179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75451675 Data size: 10217672826 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col3), count(_col4), count() keys: _col0 (type: string), _col1 (type: string), _col2 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 68592430 Data size: 9288793179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75451675 Data size: 10217672826 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 68592430 Data size: 9288793179 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75451675 Data size: 10217672826 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint) - Reducer 3 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 34296215 Data size: 4644396589 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 37725837 Data size: 5108836345 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: bigint), _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: -+++ - Statistics: Num rows: 34296215 Data size: 4644396589 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 37725837 Data size: 5108836345 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: bigint) - Reducer 4 + Reducer 6 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 34296215 Data size: 4644396589 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 37725837 Data size: 5108836345 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE @@ -419,73 +465,22 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col5, _col7 - Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: string), _col7 (type: int) - Reducer 7 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col4 (type: int) - outputColumnNames: _col3, _col5, _col7, _col8, _col9, _col12, _col13, _col14, _col15 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col3 < _col15) (type: boolean) - Statistics: Num rows: 127771498 Data size: 17302827584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col7 (type: int), _col8 (type: int) - sort order: ++ - Map-reduce partition columns: _col7 (type: int), _col8 (type: int) - Statistics: Num rows: 127771498 Data size: 17302827584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: string), _col9 (type: int), _col12 (type: int), _col13 (type: int), _col14 (type: int) - Reducer 8 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col7 (type: int), _col8 (type: int) - 1 _col2 (type: int), _col0 (type: int) - outputColumnNames: _col5, _col9, _col12, _col13, _col14, _col21, _col22 - Statistics: Num rows: 140548650 Data size: 19033110754 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col12 (type: int) - sort order: + - Map-reduce partition columns: _col12 (type: int) - Statistics: Num rows: 140548650 Data size: 19033110754 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: string), _col9 (type: int), _col13 (type: int), _col14 (type: int), _col21 (type: string), _col22 (type: int) Reducer 9 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col12 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col5, _col9, _col12, _col13, _col14, _col21, _col22, _col25 - Statistics: Num rows: 154603518 Data size: 20936422283 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col9, _col10 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col9 (type: int) + key expressions: _col2 (type: int) sort order: + - Map-reduce partition columns: _col9 (type: int) - Statistics: Num rows: 154603518 Data size: 20936422283 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: string), _col12 (type: int), _col13 (type: int), _col14 (type: int), _col21 (type: string), _col22 (type: int), _col25 (type: string) + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col9 (type: string), _col10 (type: int) Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query73.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query73.q.out b/ql/src/test/results/clientpositive/perf/spark/query73.q.out index 9847cef..7fec0e1 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query73.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query73.q.out @@ -83,36 +83,36 @@ STAGE PLANS: Map 7 Map Operator Tree: TableScan - alias: store - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + alias: household_demographics + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County') and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + predicate: (((hd_buy_potential = '>10000') or (hd_buy_potential = 'unknown')) and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.0D)) ELSE (null) END and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: s_store_sk (type: int) + expressions: hd_demo_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) Local Work: Map Reduce Local Work Map 8 Map Operator Tree: TableScan - alias: household_demographics - Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((hd_buy_potential = '>10000') or (hd_buy_potential = 'unknown')) and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.0D)) ELSE (null) END and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE + predicate: ((s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County') and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: hd_demo_sk (type: int) + expressions: s_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) Local Work: Map Reduce Local Work @@ -169,9 +169,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col4 + outputColumnNames: _col1, _col3, _col4 input vertices: 1 Map 7 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE @@ -179,7 +179,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col4 input vertices: