http://git-wip-us.apache.org/repos/asf/hive/blob/b560f492/ql/src/test/results/clientpositive/perf/query94.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query94.q.out b/ql/src/test/results/clientpositive/perf/query94.q.out index 6e24345..c5fc9e7 100644 --- a/ql/src/test/results/clientpositive/perf/query94.q.out +++ b/ql/src/test/results/clientpositive/perf/query94.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[107][tables = [$hdt$_2, $hdt$_3, $hdt$_1, $hdt$_4]] in Stage 'Reducer 17' is a cross product +Warning: Shuffle Join MERGEJOIN[113][tables = [$hdt$_2, $hdt$_3, $hdt$_1, $hdt$_4]] in Stage 'Reducer 18' is a cross product PREHOOK: query: explain select count(distinct ws_order_number) as `order count` @@ -58,174 +58,182 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 13 <- Map 12 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (SIMPLE_EDGE) -Reducer 17 <- Map 14 (CUSTOM_SIMPLE_EDGE), Map 19 (CUSTOM_SIMPLE_EDGE), Map 20 (CUSTOM_SIMPLE_EDGE), Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE) +Reducer 16 <- Map 15 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Map 15 (CUSTOM_SIMPLE_EDGE), Map 20 (CUSTOM_SIMPLE_EDGE), Map 21 (CUSTOM_SIMPLE_EDGE), Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) +Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 17 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 8 + Reducer 9 File Output Operator [FS_74] Limit [LIM_72] (rows=1 width=344) Number of rows:100 Select Operator [SEL_71] (rows=1 width=344) Output:["_col0","_col1","_col2"] - <-Reducer 7 [SIMPLE_EDGE] + <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_70] Select Operator [SEL_69] (rows=1 width=344) Output:["_col1","_col2","_col3"] - Group By Operator [GBY_68] (rows=1 width=344) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col0:0._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_67] - Group By Operator [GBY_66] (rows=1395035081047425024 width=1) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT _col4)","sum(_col5)","sum(_col6)"],keys:_col4 - Select Operator [SEL_65] (rows=1395035081047425024 width=1) - Output:["_col4","_col5","_col6"] - Filter Operator [FIL_64] (rows=1395035081047425024 width=1) - predicate:_col16 is null - Select Operator [SEL_63] (rows=2790070162094850048 width=1) - Output:["_col4","_col5","_col6","_col16"] - Merge Join Operator [MERGEJOIN_113] (rows=2790070162094850048 width=1) - Conds:RS_60._col3, _col4=RS_61._col0, _col1(Inner),Output:["_col4","_col5","_col6","_col14"] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_61] - PartitionCols:_col0, _col1 - Group By Operator [GBY_46] (rows=2536427365110644736 width=1) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_45] - PartitionCols:_col0, _col1 - Group By Operator [GBY_44] (rows=5072854730221289472 width=1) - Output:["_col0","_col1"],keys:_col2, _col3 - Select Operator [SEL_43] (rows=5072854730221289472 width=1) - Output:["_col2","_col3"] - Filter Operator [FIL_42] (rows=5072854730221289472 width=1) - predicate:(_col2 <> _col0) - Merge Join Operator [MERGEJOIN_111] (rows=5072854730221289472 width=1) - Conds:RS_39._col1=RS_40._col1(Inner),Output:["_col0","_col2","_col3"] - <-Map 14 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_39] - PartitionCols:_col1 - Select Operator [SEL_20] (rows=144002668 width=135) - Output:["_col0","_col1"] - TableScan [TS_19] (rows=144002668 width=135) - default@web_sales,ws2,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_40] - PartitionCols:_col1 - Select Operator [SEL_38] (rows=4611686018427387903 width=1) - Output:["_col0","_col1"] - Group By Operator [GBY_37] (rows=4611686018427387903 width=1) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_36] - PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=9223372036854775807 width=1) - Output:["_col0","_col1"],keys:_col4, _col3 - Merge Join Operator [MERGEJOIN_107] (rows=9223372036854775807 width=1) - Conds:(Inner),(Inner),(Inner),Output:["_col3","_col4"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_32] - Select Operator [SEL_28] (rows=144002668 width=135) - Output:["_col0","_col1"] - Please refer to the previous TableScan [TS_19] - <-Map 19 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_29] - Select Operator [SEL_22] (rows=73049 width=4) - TableScan [TS_21] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE - <-Map 20 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_30] - Select Operator [SEL_24] (rows=84 width=4) - TableScan [TS_23] (rows=84 width=1850) - default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE - <-Map 21 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_31] - Select Operator [SEL_26] (rows=40000000 width=4) - TableScan [TS_25] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_60] - PartitionCols:_col3, _col4 - Merge Join Operator [MERGEJOIN_112] (rows=210834322 width=135) - Conds:RS_57._col4=RS_58._col0(Left Outer),Output:["_col3","_col4","_col5","_col6","_col14"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_58] - PartitionCols:_col0 - Select Operator [SEL_18] (rows=7199233 width=92) - Output:["_col0","_col1"] - Group By Operator [GBY_17] (rows=7199233 width=92) - Output:["_col0"],keys:KEY._col0 - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_16] + Group By Operator [GBY_112] (rows=1 width=344) + Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_111] + Group By Operator [GBY_110] (rows=1 width=344) + Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] + Group By Operator [GBY_109] (rows=1395035081047425024 width=1) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_108] + PartitionCols:_col0 + Group By Operator [GBY_107] (rows=1395035081047425024 width=1) + Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4 + Select Operator [SEL_65] (rows=1395035081047425024 width=1) + Output:["_col4","_col5","_col6"] + Filter Operator [FIL_64] (rows=1395035081047425024 width=1) + predicate:_col16 is null + Select Operator [SEL_63] (rows=2790070162094850048 width=1) + Output:["_col4","_col5","_col6","_col16"] + Merge Join Operator [MERGEJOIN_119] (rows=2790070162094850048 width=1) + Conds:RS_60._col3, _col4=RS_61._col0, _col1(Inner),Output:["_col4","_col5","_col6","_col14"] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_61] + PartitionCols:_col0, _col1 + Group By Operator [GBY_46] (rows=2536427365110644736 width=1) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_45] + PartitionCols:_col0, _col1 + Group By Operator [GBY_44] (rows=5072854730221289472 width=1) + Output:["_col0","_col1"],keys:_col2, _col3 + Select Operator [SEL_43] (rows=5072854730221289472 width=1) + Output:["_col2","_col3"] + Filter Operator [FIL_42] (rows=5072854730221289472 width=1) + predicate:(_col2 <> _col0) + Merge Join Operator [MERGEJOIN_117] (rows=5072854730221289472 width=1) + Conds:RS_39._col1=RS_40._col1(Inner),Output:["_col0","_col2","_col3"] + <-Map 15 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_39] + PartitionCols:_col1 + Select Operator [SEL_20] (rows=144002668 width=135) + Output:["_col0","_col1"] + TableScan [TS_19] (rows=144002668 width=135) + default@web_sales,ws2,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col1 + Select Operator [SEL_38] (rows=4611686018427387903 width=1) + Output:["_col0","_col1"] + Group By Operator [GBY_37] (rows=4611686018427387903 width=1) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col0, _col1 + Group By Operator [GBY_35] (rows=9223372036854775807 width=1) + Output:["_col0","_col1"],keys:_col4, _col3 + Merge Join Operator [MERGEJOIN_113] (rows=9223372036854775807 width=1) + Conds:(Inner),(Inner),(Inner),Output:["_col3","_col4"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_32] + Select Operator [SEL_28] (rows=144002668 width=135) + Output:["_col0","_col1"] + Please refer to the previous TableScan [TS_19] + <-Map 20 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_29] + Select Operator [SEL_22] (rows=73049 width=4) + TableScan [TS_21] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE + <-Map 21 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_30] + Select Operator [SEL_24] (rows=84 width=4) + TableScan [TS_23] (rows=84 width=1850) + default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE + <-Map 22 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_31] + Select Operator [SEL_26] (rows=40000000 width=4) + TableScan [TS_25] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_60] + PartitionCols:_col3, _col4 + Merge Join Operator [MERGEJOIN_118] (rows=210834322 width=135) + Conds:RS_57._col4=RS_58._col0(Left Outer),Output:["_col3","_col4","_col5","_col6","_col14"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_58] PartitionCols:_col0 - Group By Operator [GBY_15] (rows=14398467 width=92) - Output:["_col0"],keys:wr_order_number - Filter Operator [FIL_104] (rows=14398467 width=92) - predicate:wr_order_number is not null - TableScan [TS_12] (rows=14398467 width=92) - default@web_returns,wr1,Tbl:COMPLETE,Col:NONE,Output:["wr_order_number"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_57] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_110] (rows=191667562 width=135) - Conds:RS_54._col2=RS_55._col0(Inner),Output:["_col3","_col4","_col5","_col6"] - <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_55] - PartitionCols:_col0 - Select Operator [SEL_11] (rows=42 width=1850) - Output:["_col0"] - Filter Operator [FIL_103] (rows=42 width=1850) - predicate:((web_company_name = 'pri') and web_site_sk is not null) - TableScan [TS_9] (rows=84 width=1850) - default@web_site,web_site,Tbl:COMPLETE,Col:NONE,Output:["web_site_sk","web_company_name"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_54] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_109] (rows=174243235 width=135) - Conds:RS_51._col1=RS_52._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_52] - PartitionCols:_col0 - Select Operator [SEL_8] (rows=20000000 width=1014) - Output:["_col0"] - Filter Operator [FIL_102] (rows=20000000 width=1014) - predicate:((ca_state = 'TX') and ca_address_sk is not null) - TableScan [TS_6] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_51] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_108] (rows=158402938 width=135) - Conds:RS_48._col0=RS_49._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_48] + Select Operator [SEL_18] (rows=7199233 width=92) + Output:["_col0","_col1"] + Group By Operator [GBY_17] (rows=7199233 width=92) + Output:["_col0"],keys:KEY._col0 + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_16] PartitionCols:_col0 - Select Operator [SEL_2] (rows=144002668 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_100] (rows=144002668 width=135) - predicate:(ws_ship_date_sk is not null and ws_ship_addr_sk is not null and ws_web_site_sk is not null) - TableScan [TS_0] (rows=144002668 width=135) - default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_warehouse_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] - <-Map 9 [SIMPLE_EDGE] - SHUFFLE [RS_49] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=8116 width=1119) - Output:["_col0"] - Filter Operator [FIL_101] (rows=8116 width=1119) - predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 1999-05-01 00:00:00.0 AND 1999-06-30 00:00:00.0 and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + Group By Operator [GBY_15] (rows=14398467 width=92) + Output:["_col0"],keys:wr_order_number + Filter Operator [FIL_104] (rows=14398467 width=92) + predicate:wr_order_number is not null + TableScan [TS_12] (rows=14398467 width=92) + default@web_returns,wr1,Tbl:COMPLETE,Col:NONE,Output:["wr_order_number"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_57] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_116] (rows=191667562 width=135) + Conds:RS_54._col2=RS_55._col0(Inner),Output:["_col3","_col4","_col5","_col6"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=42 width=1850) + Output:["_col0"] + Filter Operator [FIL_103] (rows=42 width=1850) + predicate:((web_company_name = 'pri') and web_site_sk is not null) + TableScan [TS_9] (rows=84 width=1850) + default@web_site,web_site,Tbl:COMPLETE,Col:NONE,Output:["web_site_sk","web_company_name"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_54] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_115] (rows=174243235 width=135) + Conds:RS_51._col1=RS_52._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_52] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=20000000 width=1014) + Output:["_col0"] + Filter Operator [FIL_102] (rows=20000000 width=1014) + predicate:((ca_state = 'TX') and ca_address_sk is not null) + TableScan [TS_6] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_51] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_114] (rows=158402938 width=135) + Conds:RS_48._col0=RS_49._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_48] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=144002668 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_100] (rows=144002668 width=135) + predicate:(ws_ship_date_sk is not null and ws_ship_addr_sk is not null and ws_web_site_sk is not null) + TableScan [TS_0] (rows=144002668 width=135) + default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_warehouse_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_49] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=8116 width=1119) + Output:["_col0"] + Filter Operator [FIL_101] (rows=8116 width=1119) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 1999-05-01 00:00:00.0 AND 1999-06-30 00:00:00.0 and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"]
http://git-wip-us.apache.org/repos/asf/hive/blob/b560f492/ql/src/test/results/clientpositive/perf/query95.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query95.q.out b/ql/src/test/results/clientpositive/perf/query95.q.out index 91d874e..332bef8 100644 --- a/ql/src/test/results/clientpositive/perf/query95.q.out +++ b/ql/src/test/results/clientpositive/perf/query95.q.out @@ -63,169 +63,177 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) -Reducer 13 <- Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Map 11 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) -Reducer 15 <- Map 18 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 13 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE) +Reducer 15 <- Map 12 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) +Reducer 16 <- Map 19 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 14 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 7 + Reducer 8 File Output Operator [FS_71] Limit [LIM_69] (rows=1 width=344) Number of rows:100 Select Operator [SEL_68] (rows=1 width=344) Output:["_col0","_col1","_col2"] - <-Reducer 6 [SIMPLE_EDGE] + <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_67] Select Operator [SEL_66] (rows=1 width=344) Output:["_col1","_col2","_col3"] - Group By Operator [GBY_65] (rows=1 width=344) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col0:0._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_64] - Group By Operator [GBY_63] (rows=421668645 width=135) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT _col3)","sum(_col4)","sum(_col5)"],keys:_col3 - Merge Join Operator [MERGEJOIN_121] (rows=421668645 width=135) - Conds:RS_58._col3=RS_59._col0(Inner),RS_58._col3=RS_60._col0(Inner),Output:["_col3","_col4","_col5"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_59] + Group By Operator [GBY_120] (rows=1 width=344) + Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_119] + Group By Operator [GBY_118] (rows=1 width=344) + Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] + Group By Operator [GBY_117] (rows=421668645 width=135) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_116] PartitionCols:_col0 - Group By Operator [GBY_25] (rows=79201469 width=135) - Output:["_col0"],keys:KEY._col0 - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col0 - Group By Operator [GBY_23] (rows=158402938 width=135) - Output:["_col0"],keys:_col1 - Select Operator [SEL_22] (rows=158402938 width=135) - Output:["_col1"] - Filter Operator [FIL_21] (rows=158402938 width=135) - predicate:(_col0 <> _col2) - Merge Join Operator [MERGEJOIN_118] (rows=158402938 width=135) - Conds:RS_18._col1=RS_19._col1(Inner),Output:["_col0","_col1","_col2"] - <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col1 - Select Operator [SEL_14] (rows=144002668 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_110] (rows=144002668 width=135) - predicate:ws_order_number is not null - TableScan [TS_12] (rows=144002668 width=135) - default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"] - <-Map 17 [SIMPLE_EDGE] - SHUFFLE [RS_19] - PartitionCols:_col1 - Select Operator [SEL_17] (rows=144002668 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_111] (rows=144002668 width=135) - predicate:ws_order_number is not null - TableScan [TS_15] (rows=144002668 width=135) - default@web_sales,ws2,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_60] - PartitionCols:_col0 - Group By Operator [GBY_47] (rows=87121617 width=135) - Output:["_col0"],keys:KEY._col0 - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_46] - PartitionCols:_col0 - Group By Operator [GBY_45] (rows=174243235 width=135) - Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_120] (rows=174243235 width=135) - Conds:RS_41._col0=RS_42._col0(Inner),Output:["_col1"] - <-Map 18 [SIMPLE_EDGE] - SHUFFLE [RS_42] + Group By Operator [GBY_115] (rows=421668645 width=135) + Output:["_col0","_col2","_col3"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col3 + Merge Join Operator [MERGEJOIN_127] (rows=421668645 width=135) + Conds:RS_58._col3=RS_59._col0(Inner),RS_58._col3=RS_60._col0(Inner),Output:["_col3","_col4","_col5"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_59] + PartitionCols:_col0 + Group By Operator [GBY_25] (rows=79201469 width=135) + Output:["_col0"],keys:KEY._col0 + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_24] PartitionCols:_col0 - Select Operator [SEL_40] (rows=14398467 width=92) - Output:["_col0"] - Filter Operator [FIL_114] (rows=14398467 width=92) - predicate:wr_order_number is not null - TableScan [TS_38] (rows=14398467 width=92) - default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_order_number"] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_41] + Group By Operator [GBY_23] (rows=158402938 width=135) + Output:["_col0"],keys:_col1 + Select Operator [SEL_22] (rows=158402938 width=135) + Output:["_col1"] + Filter Operator [FIL_21] (rows=158402938 width=135) + predicate:(_col0 <> _col2) + Merge Join Operator [MERGEJOIN_124] (rows=158402938 width=135) + Conds:RS_18._col1=RS_19._col1(Inner),Output:["_col0","_col1","_col2"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col1 + Select Operator [SEL_14] (rows=144002668 width=135) + Output:["_col0","_col1"] + Filter Operator [FIL_110] (rows=144002668 width=135) + predicate:ws_order_number is not null + TableScan [TS_12] (rows=144002668 width=135) + default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"] + <-Map 18 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col1 + Select Operator [SEL_17] (rows=144002668 width=135) + Output:["_col0","_col1"] + Filter Operator [FIL_111] (rows=144002668 width=135) + predicate:ws_order_number is not null + TableScan [TS_15] (rows=144002668 width=135) + default@web_sales,ws2,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_60] + PartitionCols:_col0 + Group By Operator [GBY_47] (rows=87121617 width=135) + Output:["_col0"],keys:KEY._col0 + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_46] + PartitionCols:_col0 + Group By Operator [GBY_45] (rows=174243235 width=135) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_126] (rows=174243235 width=135) + Conds:RS_41._col0=RS_42._col0(Inner),Output:["_col1"] + <-Map 19 [SIMPLE_EDGE] + SHUFFLE [RS_42] + PartitionCols:_col0 + Select Operator [SEL_40] (rows=14398467 width=92) + Output:["_col0"] + Filter Operator [FIL_114] (rows=14398467 width=92) + predicate:wr_order_number is not null + TableScan [TS_38] (rows=14398467 width=92) + default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_order_number"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col0 + Select Operator [SEL_37] (rows=158402938 width=135) + Output:["_col0"] + Filter Operator [FIL_36] (rows=158402938 width=135) + predicate:(_col0 <> _col2) + Merge Join Operator [MERGEJOIN_125] (rows=158402938 width=135) + Conds:RS_33._col1=RS_34._col1(Inner),Output:["_col0","_col1","_col2"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_33] + PartitionCols:_col1 + Select Operator [SEL_29] (rows=144002668 width=135) + Output:["_col0","_col1"] + Filter Operator [FIL_112] (rows=144002668 width=135) + predicate:ws_order_number is not null + Please refer to the previous TableScan [TS_12] + <-Map 18 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col1 + Select Operator [SEL_32] (rows=144002668 width=135) + Output:["_col0","_col1"] + Filter Operator [FIL_113] (rows=144002668 width=135) + predicate:ws_order_number is not null + Please refer to the previous TableScan [TS_15] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_58] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_123] (rows=191667562 width=135) + Conds:RS_55._col2=RS_56._col0(Inner),Output:["_col3","_col4","_col5"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_56] PartitionCols:_col0 - Select Operator [SEL_37] (rows=158402938 width=135) + Select Operator [SEL_11] (rows=42 width=1850) Output:["_col0"] - Filter Operator [FIL_36] (rows=158402938 width=135) - predicate:(_col0 <> _col2) - Merge Join Operator [MERGEJOIN_119] (rows=158402938 width=135) - Conds:RS_33._col1=RS_34._col1(Inner),Output:["_col0","_col1","_col2"] - <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_33] - PartitionCols:_col1 - Select Operator [SEL_29] (rows=144002668 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_112] (rows=144002668 width=135) - predicate:ws_order_number is not null - Please refer to the previous TableScan [TS_12] - <-Map 17 [SIMPLE_EDGE] - SHUFFLE [RS_34] - PartitionCols:_col1 - Select Operator [SEL_32] (rows=144002668 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_113] (rows=144002668 width=135) - predicate:ws_order_number is not null - Please refer to the previous TableScan [TS_15] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_58] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_117] (rows=191667562 width=135) - Conds:RS_55._col2=RS_56._col0(Inner),Output:["_col3","_col4","_col5"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_56] - PartitionCols:_col0 - Select Operator [SEL_11] (rows=42 width=1850) - Output:["_col0"] - Filter Operator [FIL_109] (rows=42 width=1850) - predicate:((web_company_name = 'pri') and web_site_sk is not null) - TableScan [TS_9] (rows=84 width=1850) - default@web_site,web_site,Tbl:COMPLETE,Col:NONE,Output:["web_site_sk","web_company_name"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_55] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_116] (rows=174243235 width=135) - Conds:RS_52._col1=RS_53._col0(Inner),Output:["_col2","_col3","_col4","_col5"] - <-Map 9 [SIMPLE_EDGE] - SHUFFLE [RS_53] - PartitionCols:_col0 - Select Operator [SEL_8] (rows=20000000 width=1014) - Output:["_col0"] - Filter Operator [FIL_108] (rows=20000000 width=1014) - predicate:((ca_state = 'TX') and ca_address_sk is not null) - TableScan [TS_6] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_52] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_115] (rows=158402938 width=135) - Conds:RS_49._col0=RS_50._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_49] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=144002668 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_106] (rows=144002668 width=135) - predicate:(ws_order_number is not null and ws_ship_date_sk is not null and ws_ship_addr_sk is not null and ws_web_site_sk is not null) - TableScan [TS_0] (rows=144002668 width=135) - default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_50] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=8116 width=1119) - Output:["_col0"] - Filter Operator [FIL_107] (rows=8116 width=1119) - predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 1999-05-01 00:00:00.0 AND 1999-06-30 00:00:00.0 and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + Filter Operator [FIL_109] (rows=42 width=1850) + predicate:((web_company_name = 'pri') and web_site_sk is not null) + TableScan [TS_9] (rows=84 width=1850) + default@web_site,web_site,Tbl:COMPLETE,Col:NONE,Output:["web_site_sk","web_company_name"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_122] (rows=174243235 width=135) + Conds:RS_52._col1=RS_53._col0(Inner),Output:["_col2","_col3","_col4","_col5"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_53] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=20000000 width=1014) + Output:["_col0"] + Filter Operator [FIL_108] (rows=20000000 width=1014) + predicate:((ca_state = 'TX') and ca_address_sk is not null) + TableScan [TS_6] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_52] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_121] (rows=158402938 width=135) + Conds:RS_49._col0=RS_50._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_49] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=144002668 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_106] (rows=144002668 width=135) + predicate:(ws_order_number is not null and ws_ship_date_sk is not null and ws_ship_addr_sk is not null and ws_web_site_sk is not null) + TableScan [TS_0] (rows=144002668 width=135) + default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_50] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=8116 width=1119) + Output:["_col0"] + Filter Operator [FIL_107] (rows=8116 width=1119) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 1999-05-01 00:00:00.0 AND 1999-06-30 00:00:00.0 and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] http://git-wip-us.apache.org/repos/asf/hive/blob/b560f492/ql/src/test/results/clientpositive/spark/nullgroup4.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/nullgroup4.q.out b/ql/src/test/results/clientpositive/spark/nullgroup4.q.out index 24f0291..63afe9b 100644 --- a/ql/src/test/results/clientpositive/spark/nullgroup4.q.out +++ b/ql/src/test/results/clientpositive/spark/nullgroup4.q.out @@ -95,7 +95,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -111,26 +112,44 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1), count(DISTINCT _col1) + aggregations: count(1) keys: _col1 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), count(DISTINCT KEY._col0:0._col0) + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: partial2 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1), count(_col0) + mode: partial2 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/b560f492/ql/src/test/results/clientpositive/udf_count.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/udf_count.q.out b/ql/src/test/results/clientpositive/udf_count.q.out index f60ad04..c3903b2 100644 --- a/ql/src/test/results/clientpositive/udf_count.q.out +++ b/ql/src/test/results/clientpositive/udf_count.q.out @@ -43,7 +43,8 @@ POSTHOOK: query: EXPLAIN SELECT count(DISTINCT key) FROM src POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -57,24 +58,50 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT key) keys: key (type: string) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col0:0._col0) + keys: KEY._col0 (type: string) + mode: partial2 + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + mode: partial2 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/b560f492/ql/src/test/results/clientpositive/vector_empty_where.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_empty_where.q.out b/ql/src/test/results/clientpositive/vector_empty_where.q.out index b2dec6d..b7580f3 100644 --- a/ql/src/test/results/clientpositive/vector_empty_where.q.out +++ b/ql/src/test/results/clientpositive/vector_empty_where.q.out @@ -10,7 +10,8 @@ PLAN VECTORIZATION: STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -38,26 +39,25 @@ STAGE PLANS: projectedOutputColumns: [2] Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT cint) Group By Vectorization: - aggregators: VectorUDAFCount(col 2) -> bigint className: VectorGroupByOperator vectorOutput: true keyExpressions: col 2 native: false - projectedOutputColumns: [0] + projectedOutputColumns: [] keys: cint (type: int) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -74,17 +74,72 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col0:0._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: KEY._col0 (type: int) + mode: partial2 + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: partial2 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0] + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) Group By Vectorization: vectorOutput: false native: false projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -117,7 +172,8 @@ PLAN VECTORIZATION: STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -137,26 +193,25 @@ STAGE PLANS: predicate: cint (type: int) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT cint) Group By Vectorization: - aggregators: VectorUDAFCount(col 2) -> bigint className: VectorGroupByOperator vectorOutput: true keyExpressions: col 2 native: false - projectedOutputColumns: [0] + projectedOutputColumns: [] keys: cint (type: int) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -173,17 +228,72 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col0:0._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: KEY._col0 (type: int) + mode: partial2 + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: partial2 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0] + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) Group By Vectorization: vectorOutput: false native: false projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -216,7 +326,8 @@ PLAN VECTORIZATION: STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -244,26 +355,25 @@ STAGE PLANS: projectedOutputColumns: [2] Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT cint) Group By Vectorization: - aggregators: VectorUDAFCount(col 2) -> bigint className: VectorGroupByOperator vectorOutput: true keyExpressions: col 2 native: false - projectedOutputColumns: [0] + projectedOutputColumns: [] keys: cint (type: int) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -280,17 +390,72 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col0:0._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: KEY._col0 (type: int) + mode: partial2 + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: partial2 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0] + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) Group By Vectorization: vectorOutput: false native: false projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -323,7 +488,8 @@ PLAN VECTORIZATION: STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -351,26 +517,25 @@ STAGE PLANS: projectedOutputColumns: [2] Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT cint) Group By Vectorization: - aggregators: VectorUDAFCount(col 2) -> bigint className: VectorGroupByOperator vectorOutput: true keyExpressions: col 2 native: false - projectedOutputColumns: [0] + projectedOutputColumns: [] keys: cint (type: int) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -387,17 +552,72 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col0:0._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: KEY._col0 (type: int) + mode: partial2 + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: partial2 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0] + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) Group By Vectorization: vectorOutput: false native: false projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
