http://git-wip-us.apache.org/repos/asf/hive/blob/bd371246/ql/src/test/results/clientpositive/perf/spark/query61.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query61.q.out b/ql/src/test/results/clientpositive/perf/spark/query61.q.out new file mode 100644 index 0000000..5062307 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/spark/query61.q.out @@ -0,0 +1,586 @@ +Warning: Map Join MAPJOIN[105][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +PREHOOK: query: explain +select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 +from + (select sum(ss_ext_sales_price) promotions + from store_sales + ,store + ,promotion + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_promo_sk = p_promo_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Electronics' + and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') + and s_gmt_offset = -7 + and d_year = 1999 + and d_moy = 11) promotional_sales, + (select sum(ss_ext_sales_price) total + from store_sales + ,store + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Electronics' + and s_gmt_offset = -7 + and d_year = 1999 + and d_moy = 11) all_sales +order by promotions, total +limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 +from + (select sum(ss_ext_sales_price) promotions + from store_sales + ,store + ,promotion + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_promo_sk = p_promo_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Electronics' + and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') + and s_gmt_offset = -7 + and d_year = 1999 + and d_moy = 11) promotional_sales, + (select sum(ss_ext_sales_price) total + from store_sales + ,store + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Electronics' + and s_gmt_offset = -7 + and d_year = 1999 + and d_moy = 11) all_sales +order by promotions, total +limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 24 + Map Operator Tree: + TableScan + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((s_gmt_offset = -7) and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Spark + Edges: + Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 697), Map 18 (PARTITION-LEVEL SORT, 697) + Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 1009), Reducer 21 (PARTITION-LEVEL SORT, 1009) + Reducer 17 <- Reducer 16 (GROUP, 1) + Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 398), Map 22 (PARTITION-LEVEL SORT, 398) + Reducer 21 <- Map 23 (PARTITION-LEVEL SORT, 440), Reducer 20 (PARTITION-LEVEL SORT, 440) +#### A masked pattern was here #### + Vertices: + Map 14 + Map Operator Tree: + TableScan + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_current_addr_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 18 + Map Operator Tree: + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ca_gmt_offset = -7) and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Map 19 + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + Map 22 + Map Operator Tree: + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Map 23 + Map Operator Tree: + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((i_category = 'Electronics') and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Reducer 15 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + Reducer 16 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col8 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col8) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(17,2)) + Reducer 17 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 + 1 + Reducer 20 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + Reducer 21 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col3, _col4 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col4 + input vertices: + 1 Map 24 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(7,2)) + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 12 + Map Operator Tree: + TableScan + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((s_gmt_offset = -7) and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + Map 13 + Map Operator Tree: + TableScan + alias: promotion + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((p_channel_dmail = 'Y') or (p_channel_email = 'Y') or (p_channel_tv = 'Y')) and p_promo_sk is not null) (type: boolean) + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_promo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 697), Map 6 (PARTITION-LEVEL SORT, 697) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1009), Reducer 9 (PARTITION-LEVEL SORT, 1009) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) + Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 440), Reducer 8 (PARTITION-LEVEL SORT, 440) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: customer + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_current_addr_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 10 + Map Operator Tree: + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Map 11 + Map Operator Tree: + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((i_category = 'Electronics') and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: customer_address + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ca_gmt_offset = -7) and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Map 7 + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_promo_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col9 + Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col9) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(17,2)) + Reducer 4 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + input vertices: + 1 Reducer 17 + Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: decimal(17,2)), _col1 (type: decimal(17,2)), ((CAST( _col0 AS decimal(15,4)) / CAST( _col1 AS decimal(15,4))) * 100) (type: decimal(38,19)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(17,2)), _col1 (type: decimal(17,2)) + sort order: ++ + Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: decimal(38,19)) + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(17,2)), KEY.reducesinkkey1 (type: decimal(17,2)), VALUE._col0 (type: decimal(38,19)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)) + Reducer 9 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col3, _col4, _col5 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col4, _col5 + input vertices: + 1 Map 12 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col5 + input vertices: + 1 Map 13 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: decimal(7,2)) + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink +
http://git-wip-us.apache.org/repos/asf/hive/blob/bd371246/ql/src/test/results/clientpositive/perf/spark/query63.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query63.q.out b/ql/src/test/results/clientpositive/perf/spark/query63.q.out new file mode 100644 index 0000000..3c934de --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/spark/query63.q.out @@ -0,0 +1,269 @@ +PREHOOK: query: explain +select * +from (select i_manager_id + ,sum(ss_sales_price) sum_sales + ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales + from item + ,store_sales + ,date_dim + ,store + where ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11) + and (( i_category in ('Books','Children','Electronics') + and i_class in ('personal','portable','refernece','self-help') + and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) + or( i_category in ('Women','Music','Men') + and i_class in ('accessories','classical','fragrances','pants') + and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manager_id, d_moy) tmp1 +where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 +order by i_manager_id + ,avg_monthly_sales + ,sum_sales +limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from (select i_manager_id + ,sum(ss_sales_price) sum_sales + ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales + from item + ,store_sales + ,date_dim + ,store + where ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and d_month_seq in (1212,1212+1,1212+2,1212+3,1212+4,1212+5,1212+6,1212+7,1212+8,1212+9,1212+10,1212+11) + and (( i_category in ('Books','Children','Electronics') + and i_class in ('personal','portable','refernece','self-help') + and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) + or( i_category in ('Women','Music','Men') + and i_class in ('accessories','classical','fragrances','pants') + and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manager_id, d_moy) tmp1 +where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 +order by i_manager_id + ,avg_monthly_sales + ,sum_sales +limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 8 + Map Operator Tree: + TableScan + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 399), Map 6 (PARTITION-LEVEL SORT, 399) + Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 529) + Reducer 5 <- Reducer 4 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) + Map 6 + Map Operator Tree: + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and ((i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9') or (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')) and ((i_category) IN ('Books', 'Children', 'Electronics') or (i_category) IN ('Women', 'Music', 'Men')) and ((i_class) IN ('personal', 'portable', 'refernece', 'self-help') or (i_class) IN ('accessories', 'classical', 'fragrances', 'pants')) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_manager_id (type: int) + outputColumnNames: _col0, _col4 + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_month_seq) IN (1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_moy (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36525 Data size: 40871475 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col8 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: decimal(7,2)), _col8 (type: int) + Reducer 3 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col3, _col8, _col11 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col8, _col11 + input vertices: + 1 Map 8 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col3) + keys: _col8 (type: int), _col11 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(17,2)) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: decimal(17,2)) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col2: decimal(17,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: avg_window_0 + arguments: _col2 + name: avg + window function: GenericUDAFAverageEvaluatorDecimal + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: int), _col2 (type: decimal(17,2)) + outputColumnNames: avg_window_0, _col0, _col2 + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: CASE WHEN ((avg_window_0 > 0)) THEN (((abs((_col2 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (null) END (type: boolean) + Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: decimal(17,2)), avg_window_0 (type: decimal(21,6)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: decimal(21,6)), _col1 (type: decimal(17,2)) + sort order: +++ + Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey2 (type: decimal(17,2)), KEY.reducesinkkey1 (type: decimal(21,6)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + http://git-wip-us.apache.org/repos/asf/hive/blob/bd371246/ql/src/test/results/clientpositive/perf/spark/query65.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/spark/query65.q.out b/ql/src/test/results/clientpositive/perf/spark/query65.q.out new file mode 100644 index 0000000..860a9ba --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/spark/query65.q.out @@ -0,0 +1,333 @@ +PREHOOK: query: explain +select + s_store_name, + i_item_desc, + sc.revenue, + i_current_price, + i_wholesale_cost, + i_brand + from store, item, + (select ss_store_sk, avg(revenue) as ave + from + (select ss_store_sk, ss_item_sk, + sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 + group by ss_store_sk, ss_item_sk) sa + group by ss_store_sk) sb, + (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 + group by ss_store_sk, ss_item_sk) sc + where sb.ss_store_sk = sc.ss_store_sk and + sc.revenue <= 0.1 * sb.ave and + s_store_sk = sc.ss_store_sk and + i_item_sk = sc.ss_item_sk + order by s_store_name, i_item_desc +limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + s_store_name, + i_item_desc, + sc.revenue, + i_current_price, + i_wholesale_cost, + i_brand + from store, item, + (select ss_store_sk, avg(revenue) as ave + from + (select ss_store_sk, ss_item_sk, + sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 + group by ss_store_sk, ss_item_sk) sa + group by ss_store_sk) sb, + (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1212 and 1212+11 + group by ss_store_sk, ss_item_sk) sc + where sb.ss_store_sk = sc.ss_store_sk and + sc.revenue <= 0.1 * sb.ave and + s_store_sk = sc.ss_store_sk and + i_item_sk = sc.ss_item_sk + order by s_store_name, i_item_desc +limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 6 + Map Operator Tree: + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 9 + Map Operator Tree: + TableScan + alias: date_dim + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 437) + Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 328), Reducer 2 (PARTITION-LEVEL SORT, 328), Reducer 8 (PARTITION-LEVEL SORT, 328) + Reducer 4 <- Map 11 (PARTITION-LEVEL SORT, 166), Reducer 3 (PARTITION-LEVEL SORT, 166) + Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 8 <- Map 7 (GROUP PARTITION-LEVEL SORT, 437) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + input vertices: + 1 Map 6 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col3) + keys: _col2 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(17,2)) + Local Work: + Map Reduce Local Work + Map 10 + Map Operator Tree: + TableScan + alias: store + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int), s_store_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 11 + Map Operator Tree: + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_item_desc (type: string), i_current_price (type: decimal(7,2)), i_wholesale_cost (type: decimal(7,2)), i_brand (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + input vertices: + 1 Map 9 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col3) + keys: _col2 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(17,2)) + Local Work: + Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(17,2)) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col1, _col2, _col4, _col6 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 <= (0.1 * _col4)) (type: boolean) + Statistics: Num rows: 232318249 Data size: 20495183367 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 232318249 Data size: 20495183367 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(17,2)), _col6 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col6, _col8, _col9, _col10, _col11 + Statistics: Num rows: 255550079 Data size: 22544702192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col6 (type: string), _col8 (type: string), _col2 (type: decimal(17,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 255550079 Data size: 22544702192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 255550079 Data size: 22544702192 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: string) + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(7,2)), VALUE._col2 (type: decimal(7,2)), VALUE._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 255550079 Data size: 22544702192 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: decimal(17,2)) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col1 (type: int) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 158398803 Data size: 13973988377 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: decimal(21,6)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 158398803 Data size: 13973988377 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 158398803 Data size: 13973988377 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(21,6)) + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink +
