http://git-wip-us.apache.org/repos/asf/hive/blob/bddf5a7a/ql/src/test/results/clientpositive/perf/query81.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query81.q.out b/ql/src/test/results/clientpositive/perf/query81.q.out index 8ca0068..25bd68e 100644 --- a/ql/src/test/results/clientpositive/perf/query81.q.out +++ b/ql/src/test/results/clientpositive/perf/query81.q.out @@ -1,4 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[154][tables = [$hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 22' is a cross product PREHOOK: query: explain with customer_total_return as (select cr_returning_customer_sk as ctr_customer_sk ,ca_state as ctr_state, @@ -60,249 +59,163 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) -Reducer 15 <- Map 20 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 10 <- Reducer 16 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) +Reducer 15 <- Map 18 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) Reducer 16 <- Reducer 15 (SIMPLE_EDGE) -Reducer 17 <- Reducer 16 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE), Map 30 (CUSTOM_SIMPLE_EDGE), Reducer 27 (CUSTOM_SIMPLE_EDGE) -Reducer 23 <- Reducer 22 (SIMPLE_EDGE) -Reducer 25 <- Map 24 (SIMPLE_EDGE), Map 28 (SIMPLE_EDGE) -Reducer 26 <- Map 29 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) -Reducer 27 <- Reducer 26 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 18 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 8 <- Map 11 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 9 <- Map 12 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 7 <- Map 11 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 8 <- Map 12 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 5 - File Output Operator [FS_102] - Limit [LIM_101] (rows=100 width=1) + Reducer 4 + File Output Operator [FS_67] + Limit [LIM_66] (rows=100 width=860) Number of rows:100 - Select Operator [SEL_100] (rows=930023387364950016 width=1) + Select Operator [SEL_65] (rows=32266667 width=860) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_99] - Select Operator [SEL_98] (rows=930023387364950016 width=1) + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_64] + Select Operator [SEL_63] (rows=32266667 width=860) Output:["_col0","_col1","_col11","_col12","_col13","_col14","_col15","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Filter Operator [FIL_97] (rows=930023387364950016 width=1) + Filter Operator [FIL_62] (rows=32266667 width=860) predicate:(_col2 > CASE WHEN (_col22 is null) THEN (null) ELSE (_col21) END) - Select Operator [SEL_96] (rows=2790070162094850048 width=1) + Select Operator [SEL_61] (rows=96800003 width=860) Output:["_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col11","_col12","_col13","_col14","_col16","_col18","_col19","_col20","_col21","_col22"] - Merge Join Operator [MERGEJOIN_162] (rows=2790070162094850048 width=1) - Conds:RS_93._col19=RS_94._col2(Left Outer),Output:["_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17","_col20","_col21","_col22"] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_94] - PartitionCols:_col2 - Select Operator [SEL_86] (rows=2536427365110644736 width=1) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_85] (rows=2536427365110644736 width=1) - Output:["_col0","_col1"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0 - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_84] - PartitionCols:_col0 - Group By Operator [GBY_83] (rows=5072854730221289472 width=1) - Output:["_col0","_col1"],aggregations:["avg(_col2)"],keys:_col3 - Select Operator [SEL_82] (rows=5072854730221289472 width=1) - Output:["_col3","_col2"] - Merge Join Operator [MERGEJOIN_161] (rows=5072854730221289472 width=1) - Conds:RS_79._col1=RS_80._col0(Inner),Output:["_col2","_col3"] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_79] - PartitionCols:_col1 - Select Operator [SEL_45] (rows=22000000 width=1014) - Output:["_col1","_col2"] - Group By Operator [GBY_44] (rows=22000000 width=1014) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_43] - PartitionCols:_col0, _col1 - Group By Operator [GBY_42] (rows=44000000 width=1014) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col7, _col1 - Select Operator [SEL_41] (rows=44000000 width=1014) - Output:["_col7","_col1","_col3"] - Merge Join Operator [MERGEJOIN_159] (rows=44000000 width=1014) - Conds:RS_38._col2=RS_39._col0(Inner),Output:["_col1","_col3","_col7"] - <-Map 20 [SIMPLE_EDGE] - SHUFFLE [RS_39] - PartitionCols:_col0 - Select Operator [SEL_34] (rows=40000000 width=1014) - Output:["_col0","_col1"] - Filter Operator [FIL_148] (rows=40000000 width=1014) - predicate:ca_address_sk is not null - TableScan [TS_32] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_38] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_158] (rows=31678769 width=106) - Conds:RS_35._col0=RS_36._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_35] - PartitionCols:_col0 - Select Operator [SEL_28] (rows=28798881 width=106) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_146] (rows=28798881 width=106) - predicate:(cr_returned_date_sk is not null and cr_returning_addr_sk is not null) - TableScan [TS_26] (rows=28798881 width=106) - default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_returning_addr_sk","cr_return_amt_inc_tax"] - <-Map 19 [SIMPLE_EDGE] - SHUFFLE [RS_36] - PartitionCols:_col0 - Select Operator [SEL_31] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_147] (rows=36524 width=1119) - predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_29] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Reducer 23 [SIMPLE_EDGE] - SHUFFLE [RS_80] + Merge Join Operator [MERGEJOIN_105] (rows=96800003 width=860) + Conds:RS_58._col0=RS_59._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17","_col20","_col21","_col22"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_59] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_104] (rows=24200000 width=1014) + Conds:RS_51._col1=RS_52._col2(Left Outer),Output:["_col0","_col2","_col3","_col4"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_52] + PartitionCols:_col2 + Select Operator [SEL_50] (rows=8711661 width=106) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_49] (rows=8711661 width=106) + Output:["_col0","_col1"],aggregations:["avg(_col2)"],keys:_col0 + Select Operator [SEL_45] (rows=17423323 width=106) + Output:["_col0","_col2"] + Group By Operator [GBY_44] (rows=17423323 width=106) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_43] PartitionCols:_col0 - Group By Operator [GBY_77] (rows=4611686018427387903 width=1) - Output:["_col0"],keys:KEY._col0 - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_76] + Group By Operator [GBY_42] (rows=34846646 width=106) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col7, _col1 + Select Operator [SEL_41] (rows=34846646 width=106) + Output:["_col7","_col1","_col3"] + Merge Join Operator [MERGEJOIN_103] (rows=34846646 width=106) + Conds:RS_38._col2=RS_39._col0(Inner),Output:["_col1","_col3","_col7"] + <-Map 18 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col0 + Select Operator [SEL_34] (rows=20000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_98] (rows=20000000 width=1014) + predicate:((ca_state = ca_state) and ca_address_sk is not null) + TableScan [TS_32] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_38] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_102] (rows=31678769 width=106) + Conds:RS_35._col0=RS_36._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col0 + Select Operator [SEL_28] (rows=28798881 width=106) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_96] (rows=28798881 width=106) + predicate:(cr_returned_date_sk is not null and cr_returning_addr_sk is not null) + TableScan [TS_26] (rows=28798881 width=106) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_returning_addr_sk","cr_return_amt_inc_tax"] + <-Map 17 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col0 + Select Operator [SEL_31] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_97] (rows=36524 width=1119) + predicate:((d_year = 1998) and d_date_sk is not null) + TableScan [TS_29] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_51] + PartitionCols:_col1 + Select Operator [SEL_25] (rows=22000000 width=1014) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_24] (rows=22000000 width=1014) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col0, _col1 + Group By Operator [GBY_22] (rows=44000000 width=1014) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col7, _col1 + Select Operator [SEL_21] (rows=44000000 width=1014) + Output:["_col7","_col1","_col3"] + Merge Join Operator [MERGEJOIN_101] (rows=44000000 width=1014) + Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col1","_col3","_col7"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_19] PartitionCols:_col0 - Group By Operator [GBY_75] (rows=9223372036854775807 width=1) - Output:["_col0"],keys:_col2 - Merge Join Operator [MERGEJOIN_154] (rows=9223372036854775807 width=1) - Conds:(Inner),(Inner),Output:["_col2"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_70] - Select Operator [SEL_47] (rows=80000000 width=4) - TableScan [TS_46] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:COMPLETE - <-Map 30 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_72] - Select Operator [SEL_69] (rows=40000000 width=4) - TableScan [TS_68] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE - <-Reducer 27 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_71] - Select Operator [SEL_67] (rows=22000000 width=1014) - Output:["_col1"] - Group By Operator [GBY_66] (rows=22000000 width=1014) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 26 [SIMPLE_EDGE] - SHUFFLE [RS_65] - PartitionCols:_col0, _col1 - Group By Operator [GBY_64] (rows=44000000 width=1014) - Output:["_col0","_col1"],keys:_col7, _col1 - Select Operator [SEL_63] (rows=44000000 width=1014) - Output:["_col7","_col1"] - Merge Join Operator [MERGEJOIN_153] (rows=44000000 width=1014) - Conds:RS_60._col2=RS_61._col0(Inner),Output:["_col1","_col7"] - <-Map 29 [SIMPLE_EDGE] - SHUFFLE [RS_61] - PartitionCols:_col0 - Select Operator [SEL_56] (rows=40000000 width=1014) - Output:["_col0","_col1"] - Filter Operator [FIL_151] (rows=40000000 width=1014) - predicate:ca_address_sk is not null - TableScan [TS_54] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] - <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_60] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_152] (rows=31678769 width=106) - Conds:RS_57._col0=RS_58._col0(Inner),Output:["_col1","_col2"] - <-Map 24 [SIMPLE_EDGE] - SHUFFLE [RS_57] - PartitionCols:_col0 - Select Operator [SEL_50] (rows=28798881 width=106) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_149] (rows=28798881 width=106) - predicate:(cr_returned_date_sk is not null and cr_returning_addr_sk is not null) - TableScan [TS_48] (rows=28798881 width=106) - default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_returning_addr_sk"] - <-Map 28 [SIMPLE_EDGE] - SHUFFLE [RS_58] - PartitionCols:_col0 - Select Operator [SEL_53] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_150] (rows=36524 width=1119) - predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_51] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_93] - PartitionCols:_col19 - Merge Join Operator [MERGEJOIN_160] (rows=96800003 width=860) - Conds:RS_90._col0=RS_91._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17","_col19","_col20"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_91] + Select Operator [SEL_14] (rows=40000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_95] (rows=40000000 width=1014) + predicate:ca_address_sk is not null + TableScan [TS_12] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_100] (rows=31678769 width=106) + Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_94] (rows=36524 width=1119) + predicate:((d_year = 1998) and d_date_sk is not null) + TableScan [TS_9] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Map 6 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=28798881 width=106) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_93] (rows=28798881 width=106) + predicate:(cr_returned_date_sk is not null and cr_returning_addr_sk is not null and cr_returning_customer_sk is not null) + TableScan [TS_6] (rows=28798881 width=106) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_returning_addr_sk","cr_return_amt_inc_tax"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_58] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_99] (rows=88000001 width=860) + Conds:RS_55._col2=RS_56._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col2 + Select Operator [SEL_2] (rows=80000000 width=860) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_91] (rows=80000000 width=860) + predicate:(c_customer_sk is not null and c_current_addr_sk is not null) + TableScan [TS_0] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_customer_id","c_current_addr_sk","c_salutation","c_first_name","c_last_name"] + <-Map 5 [SIMPLE_EDGE] + SHUFFLE [RS_56] PartitionCols:_col0 - Group By Operator [GBY_24] (rows=22000000 width=1014) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_23] - PartitionCols:_col0, _col1 - Group By Operator [GBY_22] (rows=44000000 width=1014) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col1, _col7 - Select Operator [SEL_21] (rows=44000000 width=1014) - Output:["_col1","_col7","_col3"] - Merge Join Operator [MERGEJOIN_157] (rows=44000000 width=1014) - Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col1","_col3","_col7"] - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_19] - PartitionCols:_col0 - Select Operator [SEL_14] (rows=40000000 width=1014) - Output:["_col0","_col1"] - Filter Operator [FIL_145] (rows=40000000 width=1014) - predicate:ca_address_sk is not null - TableScan [TS_12] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_156] (rows=31678769 width=106) - Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0 - Select Operator [SEL_11] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_144] (rows=36524 width=1119) - predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_9] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col0 - Select Operator [SEL_8] (rows=28798881 width=106) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_143] (rows=28798881 width=106) - predicate:(cr_returned_date_sk is not null and cr_returning_addr_sk is not null and cr_returning_customer_sk is not null) - TableScan [TS_6] (rows=28798881 width=106) - default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_returning_addr_sk","cr_return_amt_inc_tax"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_90] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_155] (rows=88000001 width=860) - Conds:RS_87._col2=RS_88._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_87] - PartitionCols:_col2 - Select Operator [SEL_2] (rows=80000000 width=860) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_141] (rows=80000000 width=860) - predicate:(c_customer_sk is not null and c_current_addr_sk is not null) - TableScan [TS_0] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_customer_id","c_current_addr_sk","c_salutation","c_first_name","c_last_name"] - <-Map 6 [SIMPLE_EDGE] - SHUFFLE [RS_88] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=20000000 width=1014) - Output:["_col0","_col1","_col10","_col11","_col2","_col3","_col4","_col5","_col6","_col8","_col9"] - Filter Operator [FIL_142] (rows=20000000 width=1014) - predicate:((ca_state = 'IL') and ca_address_sk is not null) - TableScan [TS_3] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_street_type","ca_suite_number","ca_city","ca_county","ca_state","ca_zip","ca_country","ca_gmt_offset","ca_location_type"] + Select Operator [SEL_5] (rows=20000000 width=1014) + Output:["_col0","_col1","_col10","_col11","_col2","_col3","_col4","_col5","_col6","_col8","_col9"] + Filter Operator [FIL_92] (rows=20000000 width=1014) + predicate:((ca_state = 'IL') and ca_address_sk is not null) + TableScan [TS_3] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_street_type","ca_suite_number","ca_city","ca_county","ca_state","ca_zip","ca_country","ca_gmt_offset","ca_location_type"]
http://git-wip-us.apache.org/repos/asf/hive/blob/bddf5a7a/ql/src/test/results/clientpositive/semijoin5.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/semijoin5.q.out b/ql/src/test/results/clientpositive/semijoin5.q.out index 07b7470..fd8e372 100644 --- a/ql/src/test/results/clientpositive/semijoin5.q.out +++ b/ql/src/test/results/clientpositive/semijoin5.q.out @@ -48,14 +48,11 @@ WHERE (t2.smallint_col_19) IN (SELECT POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-8 + Stage-2 depends on stages: Stage-1, Stage-7 Stage-3 depends on stages: Stage-2 Stage-4 depends on stages: Stage-3 - Stage-9 is a root stage - Stage-10 depends on stages: Stage-9 - Stage-6 depends on stages: Stage-10 + Stage-6 is a root stage Stage-7 depends on stages: Stage-6 - Stage-8 depends on stages: Stage-7 Stage-0 depends on stages: Stage-4 STAGE PLANS: @@ -236,149 +233,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-9 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan - alias: t1 + alias: tt1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (tinyint_col_3 is not null and bigint_col_7 is not null and decimal2016_col_26 is not null) (type: boolean) + predicate: decimal2612_col_77 is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: tinyint_col_3 (type: tinyint), bigint_col_7 (type: bigint), timestamp_col_9 (type: timestamp), decimal2016_col_26 (type: decimal(20,16)) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: decimal2612_col_77 (type: decimal(26,12)) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: tinyint), _col3 (type: decimal(34,16)), _col1 (type: bigint) - sort order: +++ - Map-reduce partition columns: _col0 (type: tinyint), _col3 (type: decimal(34,16)), _col1 (type: bigint) + key expressions: _col0 (type: decimal(26,12)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(26,12)) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: timestamp) - TableScan - alias: t2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (tinyint_col_20 is not null and decimal2709_col_9 is not null and tinyint_col_15 is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: decimal2709_col_9 (type: decimal(27,9)), tinyint_col_15 (type: tinyint), tinyint_col_20 (type: tinyint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: tinyint), _col0 (type: decimal(34,16)), UDFToLong(_col1) (type: bigint) - sort order: +++ - Map-reduce partition columns: _col2 (type: tinyint), _col0 (type: decimal(34,16)), UDFToLong(_col1) (type: bigint) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: tinyint), _col3 (type: decimal(34,16)), _col1 (type: bigint) - 1 _col2 (type: tinyint), _col0 (type: decimal(34,16)), UDFToLong(_col1) (type: bigint) - outputColumnNames: _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: _col2 (type: timestamp) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-10 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: timestamp) - sort order: + - Map-reduce partition columns: _col0 (type: timestamp) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: timestamp) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-6 - Map Reduce - Map Operator Tree: TableScan alias: tt2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: decimal1911_col_16 is not null (type: boolean) + predicate: ((timestamp_col_18 = timestamp_col_18) and decimal1911_col_16 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: decimal1911_col_16 (type: decimal(19,11)), timestamp_col_18 (type: timestamp) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: timestamp) - sort order: + - Map-reduce partition columns: _col1 (type: timestamp) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: decimal(19,11)) - TableScan - Reduce Output Operator - key expressions: _col0 (type: timestamp) - sort order: + - Map-reduce partition columns: _col0 (type: timestamp) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: timestamp) - 1 _col0 (type: timestamp) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-7 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: decimal(26,12)) - sort order: + - Map-reduce partition columns: _col0 (type: decimal(26,12)) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: timestamp) - TableScan - alias: tt1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: decimal2612_col_77 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: decimal2612_col_77 (type: decimal(26,12)) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator key expressions: _col0 (type: decimal(26,12)) sort order: + Map-reduce partition columns: _col0 (type: decimal(26,12)) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: timestamp) Reduce Operator Tree: Join Operator condition map: @@ -400,7 +288,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-8 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan http://git-wip-us.apache.org/repos/asf/hive/blob/bddf5a7a/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out b/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out index 5313e9b..3467215 100644 --- a/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out +++ b/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out @@ -95,9 +95,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Reducer 4 (PARTITION-LEVEL SORT, 4) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 4), Reducer 6 (PARTITION-LEVEL SORT, 4) - Reducer 6 <- Map 5 (GROUP, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) #### A masked pattern was here #### Vertices: Map 1 @@ -124,37 +122,22 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (l_shipmode = 'AIR') (type: boolean) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + predicate: ((l_shipmode = 'AIR') and (l_linenumber = l_linenumber)) (type: boolean) + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), l_linenumber (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Map 5 - Map Operator Tree: - TableScan - alias: li - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_linenumber (type: int) - outputColumnNames: l_linenumber - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: l_linenumber (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -164,54 +147,18 @@ STAGE PLANS: 0 _col0 (type: int), 1 (type: int) 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col1, _col2 - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE - Reducer 6 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/bddf5a7a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out index bc25efe..1901dba 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out @@ -24,9 +24,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 5 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -49,36 +47,22 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value > 'val_9') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: ((value = value) and (key = key) and (value > 'val_9')) (type: boolean) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string), value (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -96,42 +80,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reducer 6 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -274,9 +222,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 5 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -299,34 +245,23 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: value (type: string) - mode: hash + Filter Operator + predicate: (value = value) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -336,50 +271,14 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reducer 6 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator
