http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query45.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query45.q.out b/ql/src/test/results/clientpositive/perf/spark/query45.q.out index 7e1cc88..b674400 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query45.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query45.q.out @@ -46,10 +46,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 16 <- Map 15 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 5 + Map 15 Map Operator Tree: TableScan alias: item @@ -70,7 +70,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 6 + Reducer 16 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -88,45 +88,33 @@ STAGE PLANS: Spark Edges: Reducer 11 <- Map 10 (GROUP, 3) - Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 154), Map 15 (PARTITION-LEVEL SORT, 154) - Reducer 14 <- Map 16 (PARTITION-LEVEL SORT, 706), Reducer 13 (PARTITION-LEVEL SORT, 706) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 527), Reducer 9 (PARTITION-LEVEL SORT, 527) - Reducer 3 <- Reducer 2 (GROUP, 224) - Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 154), Map 14 (PARTITION-LEVEL SORT, 154) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 6 (PARTITION-LEVEL SORT, 855) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 777), Reducer 9 (PARTITION-LEVEL SORT, 777) + Reducer 4 <- Reducer 3 (GROUP, 230) + Reducer 5 <- Reducer 4 (SORT, 1) Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 7), Reducer 11 (PARTITION-LEVEL SORT, 7) - Reducer 9 <- Reducer 14 (PARTITION-LEVEL SORT, 191), Reducer 8 (PARTITION-LEVEL SORT, 191) + Reducer 9 <- Reducer 13 (PARTITION-LEVEL SORT, 174), Reducer 8 (PARTITION-LEVEL SORT, 174) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: customer_address - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ca_address_sk is not null (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ca_address_sk (type: int), ca_county (type: string), ca_zip (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - input vertices: - 1 Reducer 6 - Statistics: Num rows: 40000000 Data size: 41275195284 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 40000000 Data size: 41275195284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: bigint), _col4 (type: bigint) - Local Work: - Map Reduce Local Work + expressions: c_customer_sk (type: int), c_current_addr_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Map 10 Map Operator Tree: TableScan @@ -167,7 +155,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) - Map 15 + Map 14 Map Operator Tree: TableScan alias: date_dim @@ -184,24 +172,24 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Map 16 + Map 6 Map Operator Tree: TableScan - alias: customer - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c_customer_sk (type: int), c_current_addr_sk (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + expressions: ca_address_sk (type: int), ca_county (type: string), ca_zip (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) Map 7 Map Operator Tree: TableScan @@ -248,91 +236,99 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3 Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col2 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) - Reducer 14 + value expressions: _col2 (type: int), _col3 (type: decimal(7,2)) + Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col8 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col8 (type: int), _col1 (type: int), _col3 (type: decimal(7,2)) - outputColumnNames: _col1, _col3, _col5 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col5 (type: decimal(7,2)) - Reducer 2 + outputColumnNames: _col0, _col3, _col4 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string), _col4 (type: string) + Reducer 3 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col5 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col8, _col14 - Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col14 (type: decimal(7,2)), _col1 (type: string), _col2 (type: string), _col6 (type: string), _col3 (type: bigint), _col4 (type: bigint), _col8 (type: boolean) - outputColumnNames: _col3, _col7, _col8, _col13, _col14, _col15, _col17 - Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((substr(_col8, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') or CASE WHEN ((_col14 = 0L)) THEN (false) WHEN (_col17 is not null) THEN (true) WHEN (_col13 is null) THEN (null) WHEN ((_col15 < _col14)) THEN (null) ELSE (false) END) (type: boolean) - Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: decimal(7,2)), _col7 (type: string), _col8 (type: string) - outputColumnNames: _col3, _col7, _col8 - Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col3) - keys: _col8 (type: string), _col7 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: decimal(17,2)) - Reducer 3 + 1 _col6 (type: int) + outputColumnNames: _col3, _col4, _col6, _col8, _col12 + Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col3, _col4, _col6, _col8, _col12, _col16, _col17 + input vertices: + 1 Reducer 16 + Statistics: Num rows: 191667562 Data size: 29319594068 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col12 (type: decimal(7,2)), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col16 (type: bigint), _col17 (type: bigint), _col8 (type: boolean) + outputColumnNames: _col3, _col7, _col8, _col13, _col14, _col15, _col17 + Statistics: Num rows: 191667562 Data size: 29319594068 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((substr(_col8, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') or CASE WHEN ((_col14 = 0L)) THEN (false) WHEN (_col17 is not null) THEN (true) WHEN (_col13 is null) THEN (null) WHEN ((_col15 < _col14)) THEN (null) ELSE (false) END) (type: boolean) + Statistics: Num rows: 191667562 Data size: 29319594068 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: decimal(7,2)), _col7 (type: string), _col8 (type: string) + outputColumnNames: _col3, _col7, _col8 + Statistics: Num rows: 191667562 Data size: 29319594068 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col3) + keys: _col8 (type: string), _col7 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 191667562 Data size: 29319594068 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 191667562 Data size: 29319594068 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: decimal(17,2)) + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 105417161 Data size: 14333685343 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95833781 Data size: 14659797034 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Statistics: Num rows: 105417161 Data size: 14333685343 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95833781 Data size: 14659797034 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(17,2)) - Reducer 4 + Reducer 5 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 105417161 Data size: 14333685343 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95833781 Data size: 14659797034 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 15200 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 15200 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -360,15 +356,15 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col3 (type: int) - outputColumnNames: _col1, _col3, _col5, _col9 - Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE + 1 _col1 (type: int) + outputColumnNames: _col1, _col3, _col6, _col7 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col5 (type: int) + key expressions: _col6 (type: int) sort order: + - Map-reduce partition columns: _col5 (type: int) - Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col3 (type: boolean), _col9 (type: decimal(7,2)) + Map-reduce partition columns: _col6 (type: int) + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col3 (type: boolean), _col7 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query46.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query46.q.out b/ql/src/test/results/clientpositive/perf/spark/query46.q.out index 6705f50..8b0525d 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query46.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query46.q.out @@ -76,7 +76,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 10 + Map 11 Map Operator Tree: TableScan alias: store @@ -94,7 +94,7 @@ STAGE PLANS: 1 _col0 (type: int) Local Work: Map Reduce Local Work - Map 11 + Map 12 Map Operator Tree: TableScan alias: household_demographics @@ -116,12 +116,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 829), Reducer 8 (PARTITION-LEVEL SORT, 829) - Reducer 3 <- Map 13 (PARTITION-LEVEL SORT, 637), Reducer 2 (PARTITION-LEVEL SORT, 637) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 5 (PARTITION-LEVEL SORT, 855) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 882), Reducer 9 (PARTITION-LEVEL SORT, 882) Reducer 4 <- Reducer 3 (SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) - Reducer 7 <- Map 12 (PARTITION-LEVEL SORT, 846), Reducer 6 (PARTITION-LEVEL SORT, 846) - Reducer 8 <- Reducer 7 (GROUP, 582) + Reducer 7 <- Map 10 (PARTITION-LEVEL SORT, 398), Map 6 (PARTITION-LEVEL SORT, 398) + Reducer 8 <- Map 13 (PARTITION-LEVEL SORT, 846), Reducer 7 (PARTITION-LEVEL SORT, 846) + Reducer 9 <- Reducer 8 (GROUP, 582) #### A masked pattern was here #### Vertices: Map 1 @@ -137,12 +137,29 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string) + Map 10 + Map Operator Tree: + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_dow) IN (6, 0) and (d_year) IN (1998, 1999, 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18263 Data size: 20436297 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18263 Data size: 20436297 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: string) - Map 12 + Statistics: Num rows: 18263 Data size: 20436297 Basic stats: COMPLETE Column stats: NONE + Map 13 Map Operator Tree: TableScan alias: customer_address @@ -160,7 +177,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 13 + Map 5 Map Operator Tree: TableScan alias: current_addr @@ -178,7 +195,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 5 + Map 6 Map Operator Tree: TableScan alias: store_sales @@ -196,60 +213,43 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) - Map 9 - Map Operator Tree: - TableScan - alias: date_dim - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_dow) IN (6, 0) and (d_year) IN (1998, 1999, 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 18263 Data size: 20436297 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 18263 Data size: 20436297 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 18263 Data size: 20436297 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8 - Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col5 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: int), _col6 (type: string), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2)) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col5 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col6, _col7, _col8, _col10 - Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col2, _col3, _col5, _col6, _col8, _col9, _col10 + Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col10 <> _col6) (type: boolean) - Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE + predicate: (_col5 <> _col8) (type: boolean) + Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col10 (type: string), _col6 (type: string), _col4 (type: int), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2)) + expressions: _col3 (type: string), _col2 (type: string), _col5 (type: string), _col8 (type: string), _col6 (type: int), _col9 (type: decimal(17,2)), _col10 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int) sort order: +++++ - Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)) Reducer 4 @@ -257,7 +257,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: int), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE @@ -268,7 +268,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 7 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -288,7 +288,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col7 input vertices: - 1 Map 10 + 1 Map 11 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -298,7 +298,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col3, _col5, _col6, _col7 input vertices: - 1 Map 11 + 1 Map 12 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: int) @@ -306,7 +306,7 @@ STAGE PLANS: Map-reduce partition columns: _col3 (type: int) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) - Reducer 7 + Reducer 8 Reduce Operator Tree: Join Operator condition map: @@ -328,7 +328,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int) Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)) - Reducer 8 + Reducer 9 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query47.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query47.q.out b/ql/src/test/results/clientpositive/perf/spark/query47.q.out index bc97fdf..5175f80 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query47.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query47.q.out @@ -397,47 +397,47 @@ STAGE PLANS: Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3) - keys: _col11 (type: string), _col12 (type: string), _col8 (type: string), _col9 (type: string), _col5 (type: int), _col6 (type: int) + keys: _col5 (type: int), _col6 (type: int), _col8 (type: string), _col9 (type: string), _col11 (type: string), _col12 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int) + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) sort order: ++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int) + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col6 (type: decimal(17,2)) Reducer 14 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY._col5 (type: int) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: int) + key expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int) sort order: +++++ - Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: int) + Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: int), _col6 (type: decimal(17,2)) + value expressions: _col1 (type: int), _col6 (type: decimal(17,2)) Reducer 15 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey4 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: decimal(17,2)) + expressions: KEY.reducesinkkey4 (type: int), VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col1 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: string, _col3: string, _col4: int, _col5: int, _col6: decimal(17,2) + output shape: _col0: int, _col1: int, _col2: string, _col3: string, _col4: string, _col5: string, _col6: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col3 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST, _col4 ASC NULLS FIRST - partition by: _col3, _col2, _col0, _col1, _col4 + order by: _col3 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST, _col0 ASC NULLS FIRST + partition by: _col3, _col2, _col4, _col5, _col0 raw input shape: window functions: window function definition @@ -448,54 +448,54 @@ STAGE PLANS: window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(17,2)) + expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2)) outputColumnNames: avg_window_0, _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: int), _col5 (type: int) + key expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int), _col1 (type: int) sort order: ++++++ - Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string) + Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE value expressions: avg_window_0 (type: decimal(21,6)), _col6 (type: decimal(17,2)) Reducer 16 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: decimal(21,6)), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), VALUE._col1 (type: decimal(17,2)) + expressions: VALUE._col0 (type: decimal(21,6)), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col1 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: decimal(21,6), _col1: string, _col2: string, _col3: string, _col4: string, _col5: int, _col6: int, _col7: decimal(17,2) + output shape: _col0: decimal(21,6), _col1: int, _col2: int, _col3: string, _col4: string, _col5: string, _col6: string, _col7: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col5 ASC NULLS FIRST, _col6 ASC NULLS FIRST - partition by: _col4, _col3, _col1, _col2 + order by: _col1 ASC NULLS FIRST, _col2 ASC NULLS FIRST + partition by: _col4, _col3, _col5, _col6 raw input shape: window functions: window function definition alias: rank_window_1 - arguments: _col5, _col6 + arguments: _col1, _col2 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 > 0) and (_col5 = 2000) and rank_window_1 is not null) (type: boolean) + predicate: ((_col0 > 0) and (_col1 = 2000) and rank_window_1 is not null) (type: boolean) Statistics: Num rows: 63887519 Data size: 5636175475 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: rank_window_1 (type: int), _col0 (type: decimal(21,6)), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: decimal(17,2)) + expressions: rank_window_1 (type: int), _col0 (type: decimal(21,6)), _col1 (type: int), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: decimal(17,2)) outputColumnNames: rank_window_1, _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 63887519 Data size: 5636175475 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: CASE WHEN ((_col0 > 0)) THEN (((abs((_col7 - _col0)) / _col0) > 0.1)) ELSE (null) END (type: boolean) Statistics: Num rows: 31943759 Data size: 2818087693 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: string), _col3 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: decimal(17,2)), _col0 (type: decimal(21,6)), rank_window_1 (type: int) + expressions: _col4 (type: string), _col3 (type: string), _col5 (type: string), _col6 (type: string), _col1 (type: int), _col2 (type: int), _col7 (type: decimal(17,2)), _col0 (type: decimal(21,6)), rank_window_1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 31943759 Data size: 2818087693 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -560,52 +560,52 @@ STAGE PLANS: Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3) - keys: _col11 (type: string), _col12 (type: string), _col8 (type: string), _col9 (type: string), _col5 (type: int), _col6 (type: int) + keys: _col5 (type: int), _col6 (type: int), _col8 (type: string), _col9 (type: string), _col11 (type: string), _col12 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int) + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) sort order: ++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int) + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col6 (type: decimal(17,2)) Reducer 23 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY._col5 (type: int) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: int), _col5 (type: int) + key expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int), _col1 (type: int) sort order: ++++++ - Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string) + Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE value expressions: _col6 (type: decimal(17,2)) Reducer 24 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), VALUE._col0 (type: decimal(17,2)) + expressions: KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: string, _col3: string, _col4: int, _col5: int, _col6: decimal(17,2) + output shape: _col0: int, _col1: int, _col2: string, _col3: string, _col4: string, _col5: string, _col6: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST - partition by: _col3, _col2, _col0, _col1 + order by: _col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST + partition by: _col3, _col2, _col4, _col5 raw input shape: window functions: window function definition alias: rank_window_0 - arguments: _col4, _col5 + arguments: _col0, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) @@ -615,7 +615,7 @@ STAGE PLANS: predicate: rank_window_0 is not null (type: boolean) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string), _col6 (type: decimal(17,2)), rank_window_0 (type: int) + expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2)), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -648,52 +648,52 @@ STAGE PLANS: Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3) - keys: _col11 (type: string), _col12 (type: string), _col8 (type: string), _col9 (type: string), _col5 (type: int), _col6 (type: int) + keys: _col5 (type: int), _col6 (type: int), _col8 (type: string), _col9 (type: string), _col11 (type: string), _col12 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int) + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) sort order: ++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int) + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col6 (type: decimal(17,2)) Reducer 4 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY._col5 (type: int) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: int), _col5 (type: int) + key expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int), _col1 (type: int) sort order: ++++++ - Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string) + Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE value expressions: _col6 (type: decimal(17,2)) Reducer 5 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), VALUE._col0 (type: decimal(17,2)) + expressions: KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: string, _col3: string, _col4: int, _col5: int, _col6: decimal(17,2) + output shape: _col0: int, _col1: int, _col2: string, _col3: string, _col4: string, _col5: string, _col6: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST - partition by: _col3, _col2, _col0, _col1 + order by: _col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST + partition by: _col3, _col2, _col4, _col5 raw input shape: window functions: window function definition alias: rank_window_0 - arguments: _col4, _col5 + arguments: _col0, _col1 name: rank window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) @@ -703,7 +703,7 @@ STAGE PLANS: predicate: rank_window_0 is not null (type: boolean) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string), _col6 (type: decimal(17,2)), rank_window_0 (type: int) + expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2)), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query48.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query48.q.out b/ql/src/test/results/clientpositive/perf/spark/query48.q.out index 505b178..a2b3663 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query48.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query48.q.out @@ -161,8 +161,8 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 49), Map 7 (PARTITION-LEVEL SORT, 49) - Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 133), Reducer 3 (PARTITION-LEVEL SORT, 133) - Reducer 5 <- Map 9 (PARTITION-LEVEL SORT, 12), Reducer 4 (PARTITION-LEVEL SORT, 12) + Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 55), Reducer 3 (PARTITION-LEVEL SORT, 55) + Reducer 5 <- Map 9 (PARTITION-LEVEL SORT, 138), Reducer 4 (PARTITION-LEVEL SORT, 138) Reducer 6 <- Reducer 5 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -216,38 +216,38 @@ STAGE PLANS: Map 8 Map Operator Tree: TableScan - alias: customer_address - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + alias: customer_demographics + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + predicate: ((cd_education_status = '4 yr Degree') and (cd_marital_status = 'M') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ca_address_sk (type: int), ca_state (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + expressions: cd_demo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE Map 9 Map Operator Tree: TableScan - alias: customer_demographics - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((cd_education_status = '4 yr Degree') and (cd_marital_status = 'M') and cd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE + predicate: ((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cd_demo_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE + expressions: ca_address_sk (type: int), ca_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 3 Reduce Operator Tree: Join Operator @@ -259,49 +259,53 @@ STAGE PLANS: outputColumnNames: _col2, _col3, _col5, _col7 Statistics: Num rows: 77439413 Data size: 6831727584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: int) + key expressions: _col2 (type: int) sort order: + - Map-reduce partition columns: _col3 (type: int) + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 77439413 Data size: 6831727584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col5 (type: int), _col7 (type: decimal(7,2)) + value expressions: _col3 (type: int), _col5 (type: int), _col7 (type: decimal(7,2)) Reducer 4 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col5, _col7, _col11 + outputColumnNames: _col3, _col5, _col7 Statistics: Num rows: 85183356 Data size: 7514900505 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col11) IN ('KY', 'GA', 'NM') and _col7 BETWEEN 0 AND 2000) or ((_col11) IN ('MT', 'OR', 'IN') and _col7 BETWEEN 150 AND 3000) or ((_col11) IN ('WI', 'MO', 'WV') and _col7 BETWEEN 50 AND 25000)) (type: boolean) - Statistics: Num rows: 14197224 Data size: 1252483241 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 14197224 Data size: 1252483241 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: int) + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 85183356 Data size: 7514900505 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: int), _col7 (type: decimal(7,2)) Reducer 5 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) - outputColumnNames: _col5 - Statistics: Num rows: 15616946 Data size: 1377731594 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col5) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + outputColumnNames: _col5, _col7, _col14 + Statistics: Num rows: 93701693 Data size: 8266390734 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col14) IN ('KY', 'GA', 'NM') and _col7 BETWEEN 0 AND 2000) or ((_col14) IN ('MT', 'OR', 'IN') and _col7 BETWEEN 150 AND 3000) or ((_col14) IN ('WI', 'MO', 'WV') and _col7 BETWEEN 50 AND 25000)) (type: boolean) + Statistics: Num rows: 15616947 Data size: 1377731627 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col5 (type: int) + outputColumnNames: _col5 + Statistics: Num rows: 15616947 Data size: 1377731627 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col5) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 6 Reduce Operator Tree: Group By Operator http://git-wip-us.apache.org/repos/asf/hive/blob/244ca8e5/ql/src/test/results/clientpositive/perf/spark/query5.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query5.q.out b/ql/src/test/results/clientpositive/perf/spark/query5.q.out index a0b70a7..90b45c1 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query5.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query5.q.out @@ -311,9 +311,9 @@ STAGE PLANS: Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 329), Map 14 (PARTITION-LEVEL SORT, 329), Map 9 (PARTITION-LEVEL SORT, 329) Reducer 11 <- Map 15 (PARTITION-LEVEL SORT, 362), Reducer 10 (PARTITION-LEVEL SORT, 362) Reducer 12 <- Reducer 11 (GROUP, 398) - Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 322), Map 22 (PARTITION-LEVEL SORT, 322), Reducer 20 (PARTITION-LEVEL SORT, 322) + Reducer 17 <- Map 14 (PARTITION-LEVEL SORT, 322), Map 16 (PARTITION-LEVEL SORT, 322), Reducer 20 (PARTITION-LEVEL SORT, 322) Reducer 18 <- Reducer 17 (GROUP, 389) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 432), Map 14 (PARTITION-LEVEL SORT, 432), Map 6 (PARTITION-LEVEL SORT, 432) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 432), Map 6 (PARTITION-LEVEL SORT, 432), Map 7 (PARTITION-LEVEL SORT, 432) Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 164), Map 21 (PARTITION-LEVEL SORT, 164) Reducer 3 <- Reducer 2 (GROUP, 523) Reducer 4 <- Reducer 12 (GROUP, 1009), Reducer 18 (GROUP, 1009), Reducer 3 (GROUP, 1009) @@ -445,23 +445,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int), _col2 (type: int) Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)) - Map 22 - Map Operator Tree: - TableScan - alias: date_dim - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00.0' AND TIMESTAMP'1998-08-18 00:00:00.0' and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Map 6 Map Operator Tree: TableScan @@ -480,6 +463,23 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 633586785 Data size: 55276696920 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + Map 7 + Map Operator Tree: + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00.0' AND TIMESTAMP'1998-08-18 00:00:00.0' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Map 9 Map Operator Tree: TableScan