http://git-wip-us.apache.org/repos/asf/hive/blob/ff30a1eb/ql/src/test/results/clientpositive/perf/query72.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query72.q.out b/ql/src/test/results/clientpositive/perf/query72.q.out index 4d18fb3..7b43cc3 100644 --- a/ql/src/test/results/clientpositive/perf/query72.q.out +++ b/ql/src/test/results/clientpositive/perf/query72.q.out @@ -61,190 +61,190 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) -Reducer 13 <- Map 18 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Map 19 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 15 <- Map 20 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Map 21 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 6 <- Map 10 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 16 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Map 22 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Map 23 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 10 <- Map 16 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 11 <- Map 17 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 18 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Map 19 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 14 <- Map 20 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 15 <- Map 21 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 22 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 23 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 4 + Reducer 7 File Output Operator [FS_75] Limit [LIM_74] (rows=100 width=135) Number of rows:100 Select Operator [SEL_73] (rows=37725837 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 3 [SIMPLE_EDGE] + <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_72] Select Operator [SEL_70] (rows=37725837 width=135) Output:["_col0","_col1","_col2","_col5"] Group By Operator [GBY_69] (rows=37725837 width=135) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 2 [SIMPLE_EDGE] + <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_68] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_67] (rows=75451675 width=135) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col17, _col15, _col24 - Merge Join Operator [MERGEJOIN_142] (rows=75451675 width=135) - Conds:RS_63._col0, _col1=RS_64._col4, _col6(Right Outer),Output:["_col15","_col17","_col24"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_63] + Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col15, _col13, _col22 + Merge Join Operator [MERGEJOIN_141] (rows=75451675 width=135) + Conds:RS_63._col4, _col6=RS_64._col0, _col1(Left Outer),Output:["_col13","_col15","_col22"] + <-Map 23 [SIMPLE_EDGE] + SHUFFLE [RS_64] PartitionCols:_col0, _col1 - Select Operator [SEL_2] (rows=28798881 width=106) + Select Operator [SEL_62] (rows=28798881 width=106) Output:["_col0","_col1"] - Filter Operator [FIL_122] (rows=28798881 width=106) + Filter Operator [FIL_131] (rows=28798881 width=106) predicate:cr_item_sk is not null - TableScan [TS_0] (rows=28798881 width=106) + TableScan [TS_60] (rows=28798881 width=106) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_item_sk","cr_order_number"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_64] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_63] PartitionCols:_col4, _col6 - Select Operator [SEL_62] (rows=68592431 width=135) + Select Operator [SEL_59] (rows=68592431 width=135) Output:["_col4","_col6","_col13","_col15","_col22"] - Filter Operator [FIL_61] (rows=68592431 width=135) - predicate:(UDFToDouble(_col28) > (UDFToDouble(_col17) + 5.0)) - Merge Join Operator [MERGEJOIN_141] (rows=205777295 width=135) - Conds:RS_58._col9=RS_59._col0(Inner),Output:["_col5","_col7","_col12","_col14","_col17","_col18","_col28"] - <-Map 23 [SIMPLE_EDGE] - SHUFFLE [RS_59] - PartitionCols:_col0 - Select Operator [SEL_47] (rows=73049 width=1119) - Output:["_col0","_col1"] - Filter Operator [FIL_132] (rows=73049 width=1119) - predicate:d_date_sk is not null - TableScan [TS_45] (rows=73049 width=1119) - default@date_dim,d3,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_58] - PartitionCols:_col9 - Merge Join Operator [MERGEJOIN_140] (rows=187070265 width=135) - Conds:RS_55._col0, _col18=RS_56._col0, _col1(Inner),Output:["_col5","_col7","_col9","_col12","_col14","_col17","_col18"] - <-Map 22 [SIMPLE_EDGE] - SHUFFLE [RS_56] - PartitionCols:_col0, _col1 - Select Operator [SEL_44] (rows=73049 width=1119) - Output:["_col0","_col1"] - Filter Operator [FIL_131] (rows=73049 width=1119) - predicate:(d_date_sk is not null and d_week_seq is not null) - TableScan [TS_42] (rows=73049 width=1119) - default@date_dim,d2,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_week_seq"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_55] - PartitionCols:_col0, _col18 - Filter Operator [FIL_54] (rows=170063874 width=135) - predicate:(_col3 < _col15) - Merge Join Operator [MERGEJOIN_139] (rows=510191624 width=135) - Conds:RS_51._col1=RS_52._col6(Inner),Output:["_col0","_col3","_col5","_col7","_col9","_col12","_col14","_col15","_col17","_col18"] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_52] - PartitionCols:_col6 - Select Operator [SEL_41] (rows=463810558 width=135) - Output:["_col1","_col3","_col6","_col8","_col9","_col11","_col12"] - Merge Join Operator [MERGEJOIN_138] (rows=463810558 width=135) + Merge Join Operator [MERGEJOIN_140] (rows=68592431 width=135) + Conds:RS_56._col0, _col20=RS_57._col0, _col1(Inner),Output:["_col5","_col9","_col14","_col16","_col20"] + <-Map 22 [SIMPLE_EDGE] + SHUFFLE [RS_57] + PartitionCols:_col0, _col1 + Select Operator [SEL_48] (rows=73049 width=1119) + Output:["_col0","_col1"] + Filter Operator [FIL_130] (rows=73049 width=1119) + predicate:(d_date_sk is not null and d_week_seq is not null) + TableScan [TS_46] (rows=73049 width=1119) + default@date_dim,d2,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_week_seq"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_56] + PartitionCols:_col0, _col20 + Filter Operator [FIL_55] (rows=62356755 width=135) + predicate:(_col3 < _col17) + Merge Join Operator [MERGEJOIN_139] (rows=187070265 width=135) + Conds:RS_52._col1=RS_53._col8(Inner),Output:["_col0","_col3","_col5","_col9","_col14","_col16","_col17","_col20"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_53] + PartitionCols:_col8 + Select Operator [SEL_45] (rows=170063874 width=135) + Output:["_col3","_col8","_col10","_col11","_col14"] + Filter Operator [FIL_44] (rows=170063874 width=135) + predicate:(UDFToDouble(_col20) > (UDFToDouble(_col9) + 5.0)) + Merge Join Operator [MERGEJOIN_138] (rows=510191624 width=135) + Conds:RS_41._col1=RS_42._col0(Inner),Output:["_col4","_col6","_col7","_col9","_col10","_col18","_col20"] + <-Map 21 [SIMPLE_EDGE] + SHUFFLE [RS_42] + PartitionCols:_col0 + Select Operator [SEL_25] (rows=73049 width=1119) + Output:["_col0","_col1"] + Filter Operator [FIL_129] (rows=73049 width=1119) + predicate:d_date_sk is not null + TableScan [TS_23] (rows=73049 width=1119) + default@date_dim,d3,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_137] (rows=463810558 width=135) Conds:RS_38._col4=RS_39._col0(Inner),Output:["_col1","_col4","_col6","_col7","_col9","_col10","_col18"] - <-Map 21 [SIMPLE_EDGE] + <-Map 20 [SIMPLE_EDGE] SHUFFLE [RS_39] PartitionCols:_col0 - Select Operator [SEL_25] (rows=462000 width=1436) + Select Operator [SEL_22] (rows=462000 width=1436) Output:["_col0","_col1"] - Filter Operator [FIL_130] (rows=462000 width=1436) + Filter Operator [FIL_128] (rows=462000 width=1436) predicate:i_item_sk is not null - TableScan [TS_23] (rows=462000 width=1436) + TableScan [TS_20] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_desc"] - <-Reducer 15 [SIMPLE_EDGE] + <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_38] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_137] (rows=421645953 width=135) + Merge Join Operator [MERGEJOIN_136] (rows=421645953 width=135) Conds:RS_35._col5=RS_36._col0(Left Outer),Output:["_col1","_col4","_col6","_col7","_col9","_col10"] - <-Map 20 [SIMPLE_EDGE] + <-Map 19 [SIMPLE_EDGE] SHUFFLE [RS_36] PartitionCols:_col0 - Select Operator [SEL_22] (rows=2300 width=1179) + Select Operator [SEL_19] (rows=2300 width=1179) Output:["_col0"] - TableScan [TS_21] (rows=2300 width=1179) + TableScan [TS_18] (rows=2300 width=1179) default@promotion,promotion,Tbl:COMPLETE,Col:NONE,Output:["p_promo_sk"] - <-Reducer 14 [SIMPLE_EDGE] + <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_35] PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_136] (rows=383314495 width=135) + Merge Join Operator [MERGEJOIN_135] (rows=383314495 width=135) Conds:RS_32._col3=RS_33._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col9","_col10"] - <-Map 19 [SIMPLE_EDGE] + <-Map 18 [SIMPLE_EDGE] SHUFFLE [RS_33] PartitionCols:_col0 - Select Operator [SEL_20] (rows=3600 width=107) + Select Operator [SEL_17] (rows=3600 width=107) Output:["_col0"] - Filter Operator [FIL_128] (rows=3600 width=107) + Filter Operator [FIL_126] (rows=3600 width=107) predicate:((hd_buy_potential = '1001-5000') and hd_demo_sk is not null) - TableScan [TS_18] (rows=7200 width=107) + TableScan [TS_15] (rows=7200 width=107) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_buy_potential"] - <-Reducer 13 [SIMPLE_EDGE] + <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_32] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_135] (rows=348467716 width=135) + Merge Join Operator [MERGEJOIN_134] (rows=348467716 width=135) Conds:RS_29._col2=RS_30._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col9","_col10"] - <-Map 18 [SIMPLE_EDGE] + <-Map 17 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col0 - Select Operator [SEL_17] (rows=930900 width=385) + Select Operator [SEL_14] (rows=930900 width=385) Output:["_col0"] - Filter Operator [FIL_127] (rows=930900 width=385) + Filter Operator [FIL_125] (rows=930900 width=385) predicate:((cd_marital_status = 'M') and cd_demo_sk is not null) - TableScan [TS_15] (rows=1861800 width=385) + TableScan [TS_12] (rows=1861800 width=385) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status"] - <-Reducer 12 [SIMPLE_EDGE] + <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_134] (rows=316788826 width=135) + Merge Join Operator [MERGEJOIN_133] (rows=316788826 width=135) Conds:RS_26._col0=RS_27._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10"] - <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_26] - PartitionCols:_col0 - Select Operator [SEL_11] (rows=287989836 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_125] (rows=287989836 width=135) - predicate:(cs_item_sk is not null and cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_sold_date_sk is not null and cs_ship_date_sk is not null) - TableScan [TS_9] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_date_sk","cs_bill_cdemo_sk","cs_bill_hdemo_sk","cs_item_sk","cs_promo_sk","cs_order_number","cs_quantity"] - <-Map 17 [SIMPLE_EDGE] + <-Map 16 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col0 - Select Operator [SEL_14] (rows=36524 width=1119) + Select Operator [SEL_11] (rows=36524 width=1119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_126] (rows=36524 width=1119) + Filter Operator [FIL_124] (rows=36524 width=1119) predicate:((d_year = 2001) and d_date_sk is not null and d_week_seq is not null) - TableScan [TS_12] (rows=73049 width=1119) + TableScan [TS_9] (rows=73049 width=1119) default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date","d_week_seq","d_year"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_51] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_133] (rows=41342400 width=15) - Conds:RS_48._col2=RS_49._col0(Inner),Output:["_col0","_col1","_col3","_col5"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_49] - PartitionCols:_col0 - Select Operator [SEL_8] (rows=27 width=1029) - Output:["_col0","_col1"] - Filter Operator [FIL_124] (rows=27 width=1029) - predicate:w_warehouse_sk is not null - TableScan [TS_6] (rows=27 width=1029) - default@warehouse,warehouse,Tbl:COMPLETE,Col:NONE,Output:["w_warehouse_sk","w_warehouse_name"] - <-Map 5 [SIMPLE_EDGE] - SHUFFLE [RS_48] - PartitionCols:_col2 - Select Operator [SEL_5] (rows=37584000 width=15) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_123] (rows=37584000 width=15) - predicate:(inv_item_sk is not null and inv_warehouse_sk is not null and inv_date_sk is not null) - TableScan [TS_3] (rows=37584000 width=15) - default@inventory,inventory,Tbl:COMPLETE,Col:NONE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=287989836 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_123] (rows=287989836 width=135) + predicate:(cs_item_sk is not null and cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_sold_date_sk is not null and cs_ship_date_sk is not null) + TableScan [TS_6] (rows=287989836 width=135) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_date_sk","cs_bill_cdemo_sk","cs_bill_hdemo_sk","cs_item_sk","cs_promo_sk","cs_order_number","cs_quantity"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_52] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_132] (rows=41342400 width=15) + Conds:RS_49._col2=RS_50._col0(Inner),Output:["_col0","_col1","_col3","_col5"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_49] + PartitionCols:_col2 + Select Operator [SEL_2] (rows=37584000 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_121] (rows=37584000 width=15) + predicate:(inv_item_sk is not null and inv_warehouse_sk is not null and inv_date_sk is not null) + TableScan [TS_0] (rows=37584000 width=15) + default@inventory,inventory,Tbl:COMPLETE,Col:NONE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_50] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=27 width=1029) + Output:["_col0","_col1"] + Filter Operator [FIL_122] (rows=27 width=1029) + predicate:w_warehouse_sk is not null + TableScan [TS_3] (rows=27 width=1029) + default@warehouse,warehouse,Tbl:COMPLETE,Col:NONE,Output:["w_warehouse_sk","w_warehouse_name"]
http://git-wip-us.apache.org/repos/asf/hive/blob/ff30a1eb/ql/src/test/results/clientpositive/perf/query85.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query85.q.out b/ql/src/test/results/clientpositive/perf/query85.q.out index f188fa2..ebc4ac4 100644 --- a/ql/src/test/results/clientpositive/perf/query85.q.out +++ b/ql/src/test/results/clientpositive/perf/query85.q.out @@ -197,9 +197,9 @@ Stage-0 SHUFFLE [RS_49] PartitionCols:_col0 Group By Operator [GBY_48] (rows=2047980 width=385) - Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(_col5)","avg(_col17)","avg(_col16)"],keys:_col19 + Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(_col12)","avg(_col7)","avg(_col6)"],keys:_col22 Merge Join Operator [MERGEJOIN_106] (rows=2047980 width=385) - Conds:RS_44._col13, _col24, _col25=RS_45._col0, _col1, _col2(Inner),Output:["_col5","_col16","_col17","_col19"] + Conds:RS_44._col3, _col24, _col25=RS_45._col0, _col1, _col2(Inner),Output:["_col6","_col7","_col12","_col22"] <-Map 16 [SIMPLE_EDGE] SHUFFLE [RS_45] PartitionCols:_col0, _col1, _col2 @@ -211,90 +211,90 @@ Stage-0 default@customer_demographics,cd1,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_44] - PartitionCols:_col13, _col24, _col25 + PartitionCols:_col3, _col24, _col25 Filter Operator [FIL_43] (rows=393687 width=135) - predicate:(((_col24 = 'M') and (_col25 = '4 yr Degree') and _col6 BETWEEN 100 AND 150) or ((_col24 = 'D') and (_col25 = 'Primary') and _col6 BETWEEN 50 AND 100) or ((_col24 = 'U') and (_col25 = 'Advanced Degree') and _col6 BETWEEN 150 AND 200)) - Merge Join Operator [MERGEJOIN_105] (rows=4724247 width=135) - Conds:RS_40._col11=RS_41._col0(Inner),Output:["_col5","_col6","_col13","_col16","_col17","_col19","_col24","_col25"] + predicate:(((_col24 = 'M') and (_col25 = '4 yr Degree') and _col13 BETWEEN 100 AND 150) or ((_col24 = 'D') and (_col25 = 'Primary') and _col13 BETWEEN 50 AND 100) or ((_col24 = 'U') and (_col25 = 'Advanced Degree') and _col13 BETWEEN 150 AND 200)) + Merge Join Operator [MERGEJOIN_105] (rows=4724246 width=135) + Conds:RS_40._col1=RS_41._col0(Inner),Output:["_col3","_col6","_col7","_col12","_col13","_col22","_col24","_col25"] <-Map 16 [SIMPLE_EDGE] SHUFFLE [RS_41] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_20] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_40] - PartitionCols:_col11 - Filter Operator [FIL_39] (rows=4294770 width=135) - predicate:(((_col21) IN ('KY', 'GA', 'NM') and _col7 BETWEEN 100 AND 200) or ((_col21) IN ('MT', 'OR', 'IN') and _col7 BETWEEN 150 AND 300) or ((_col21) IN ('WI', 'MO', 'WV') and _col7 BETWEEN 50 AND 250)) - Merge Join Operator [MERGEJOIN_104] (rows=25768635 width=135) - Conds:RS_36._col12=RS_37._col0(Inner),Output:["_col5","_col6","_col7","_col11","_col13","_col16","_col17","_col19","_col21"] - <-Map 15 [SIMPLE_EDGE] - SHUFFLE [RS_37] - PartitionCols:_col0 - Select Operator [SEL_17] (rows=10000000 width=1014) - Output:["_col0","_col1"] - Filter Operator [FIL_97] (rows=10000000 width=1014) - predicate:((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States') and ca_address_sk is not null) - TableScan [TS_15] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_36] - PartitionCols:_col12 - Merge Join Operator [MERGEJOIN_103] (rows=23426032 width=135) - Conds:RS_33._col14=RS_34._col0(Inner),Output:["_col5","_col6","_col7","_col11","_col12","_col13","_col16","_col17","_col19"] - <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_34] - PartitionCols:_col0 - Select Operator [SEL_14] (rows=72 width=200) - Output:["_col0","_col1"] - Filter Operator [FIL_96] (rows=72 width=200) - predicate:r_reason_sk is not null - TableScan [TS_12] (rows=72 width=200) - default@reason,reason,Tbl:COMPLETE,Col:NONE,Output:["r_reason_sk","r_reason_desc"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_33] - PartitionCols:_col14 - Merge Join Operator [MERGEJOIN_102] (rows=21296393 width=135) - Conds:RS_30._col2, _col4=RS_31._col0, _col5(Inner),Output:["_col5","_col6","_col7","_col11","_col12","_col13","_col14","_col16","_col17"] - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_31] - PartitionCols:_col0, _col5 - Select Operator [SEL_11] (rows=14398467 width=92) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_95] (rows=14398467 width=92) - predicate:(wr_item_sk is not null and wr_order_number is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null and wr_refunded_addr_sk is not null and wr_reason_sk is not null) - TableScan [TS_9] (rows=14398467 width=92) - default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_item_sk","wr_refunded_cdemo_sk","wr_refunded_addr_sk","wr_returning_cdemo_sk","wr_reason_sk","wr_order_number","wr_fee","wr_refunded_cash"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col2, _col4 + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_104] (rows=4294769 width=135) + Conds:RS_37._col4=RS_38._col0(Inner),Output:["_col1","_col3","_col6","_col7","_col12","_col13","_col22"] + <-Map 15 [SIMPLE_EDGE] + SHUFFLE [RS_38] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=72 width=200) + Output:["_col0","_col1"] + Filter Operator [FIL_97] (rows=72 width=200) + predicate:r_reason_sk is not null + TableScan [TS_15] (rows=72 width=200) + default@reason,reason,Tbl:COMPLETE,Col:NONE,Output:["r_reason_sk","r_reason_desc"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_37] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_103] (rows=3904336 width=135) + Conds:RS_34._col8=RS_35._col0(Inner),Output:["_col1","_col3","_col4","_col6","_col7","_col12","_col13"] + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_96] (rows=36524 width=1119) + predicate:((d_year = 1998) and d_date_sk is not null) + TableScan [TS_12] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col8 + Merge Join Operator [MERGEJOIN_102] (rows=3549397 width=135) + Conds:RS_31._col10=RS_32._col0(Inner),Output:["_col1","_col3","_col4","_col6","_col7","_col8","_col12","_col13"] + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=4602 width=585) + Output:["_col0"] + Filter Operator [FIL_95] (rows=4602 width=585) + predicate:wp_web_page_sk is not null + TableScan [TS_9] (rows=4602 width=585) + default@web_page,web_page,Tbl:COMPLETE,Col:NONE,Output:["wp_web_page_sk"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col10 + Filter Operator [FIL_30] (rows=3226725 width=135) + predicate:(((_col16) IN ('KY', 'GA', 'NM') and _col14 BETWEEN 100 AND 200) or ((_col16) IN ('MT', 'OR', 'IN') and _col14 BETWEEN 150 AND 300) or ((_col16) IN ('WI', 'MO', 'WV') and _col14 BETWEEN 50 AND 250)) Merge Join Operator [MERGEJOIN_101] (rows=19360357 width=135) - Conds:RS_27._col1=RS_28._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7"] + Conds:RS_27._col2=RS_28._col0(Inner),Output:["_col1","_col3","_col4","_col6","_col7","_col8","_col10","_col12","_col13","_col14","_col16"] <-Map 12 [SIMPLE_EDGE] SHUFFLE [RS_28] PartitionCols:_col0 - Select Operator [SEL_8] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_94] (rows=36524 width=1119) - predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + Select Operator [SEL_8] (rows=10000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_94] (rows=10000000 width=1014) + predicate:((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States') and ca_address_sk is not null) + TableScan [TS_6] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_27] - PartitionCols:_col1 + PartitionCols:_col2 Merge Join Operator [MERGEJOIN_100] (rows=17600325 width=135) - Conds:RS_24._col0=RS_25._col2(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7"] + Conds:RS_24._col0, _col5=RS_25._col1, _col3(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col10","_col12","_col13","_col14"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_24] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=4602 width=585) - Output:["_col0"] - Filter Operator [FIL_92] (rows=4602 width=585) - predicate:wp_web_page_sk is not null - TableScan [TS_0] (rows=4602 width=585) - default@web_page,web_page,Tbl:COMPLETE,Col:NONE,Output:["wp_web_page_sk"] + PartitionCols:_col0, _col5 + Select Operator [SEL_2] (rows=14398467 width=92) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_92] (rows=14398467 width=92) + predicate:(wr_item_sk is not null and wr_order_number is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null and wr_refunded_addr_sk is not null and wr_reason_sk is not null) + TableScan [TS_0] (rows=14398467 width=92) + default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_item_sk","wr_refunded_cdemo_sk","wr_refunded_addr_sk","wr_returning_cdemo_sk","wr_reason_sk","wr_order_number","wr_fee","wr_refunded_cash"] <-Map 11 [SIMPLE_EDGE] SHUFFLE [RS_25] - PartitionCols:_col2 + PartitionCols:_col1, _col3 Select Operator [SEL_5] (rows=16000296 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] Filter Operator [FIL_93] (rows=16000296 width=135) http://git-wip-us.apache.org/repos/asf/hive/blob/ff30a1eb/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out b/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out index cd24cbb..02d1605 100644 --- a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out +++ b/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out @@ -252,10 +252,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -326,10 +326,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -400,10 +400,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -684,10 +684,10 @@ STAGE PLANS: 1 _col1 (type: string), _col0 (type: int) 2 _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 64 Data size: 18944 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 64 Data size: 18944 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -752,10 +752,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 48 Data size: 5607 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 48 Data size: 5607 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -831,10 +831,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: int) 1 _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 2376 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -899,10 +899,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 48 Data size: 9312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -967,10 +967,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 54 Data size: 1746 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 54 Data size: 9506 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 54 Data size: 1746 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 54 Data size: 9506 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/ff30a1eb/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out b/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out index f1728ce..b1ee89c 100644 --- a/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out +++ b/ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out @@ -390,9 +390,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -408,115 +408,117 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map 5 + value expressions: _col1 (type: string) + Map 4 Map Operator Tree: TableScan - alias: p2 + alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map 6 + value expressions: _col1 (type: string) + Map 5 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_name (type: string) - outputColumnNames: _col0 + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Map 7 Map Operator Tree: TableScan - alias: p4 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 + expressions: p_name (type: string) + outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + value expressions: _col3 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col1, _col3, _col5, _col6 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string) - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col4, _col6 - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string) + expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string) Stage: Stage-0 Fetch Operator @@ -542,9 +544,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -560,115 +562,117 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map 5 + value expressions: _col1 (type: string) + Map 4 Map Operator Tree: TableScan - alias: p2 + alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map 6 + value expressions: _col1 (type: string) + Map 5 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_name (type: string) - outputColumnNames: _col0 + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Map 7 Map Operator Tree: TableScan - alias: p4 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string) - outputColumnNames: _col0, _col1 + expressions: p_name (type: string) + outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string) + value expressions: _col3 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col1, _col3, _col5, _col6 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string) - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col4, _col6 - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string) + expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string) Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/ff30a1eb/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out index c3b805a..c978857 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out @@ -150,30 +150,30 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: p1 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Map 5 + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Map 4 Map Operator Tree: TableScan alias: p2 @@ -186,29 +186,29 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Map 6 + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Map 5 Map Operator Tree: TableScan - alias: p3 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 7 Map Operator Tree: TableScan @@ -233,33 +233,38 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string), _col0 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 0 _col1 (type: string) + 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col10 (type: string) - sort order: + - Map-reduce partition columns: _col10 (type: string) + key expressions: _col9 (type: int), _col10 (type: string) + sort order: ++ + Map-reduce partition columns: _col9 (type: int), _col10 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col10 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + 0 _col9 (type: int), _col10 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Select Operator + expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) - Reducer 4 + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Join Operator condition map: @@ -267,15 +272,14 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Stage: Stage-0 Fetch Operator