http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query34.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query34.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query34.q.out new file mode 100644 index 0000000..f4e6a73 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query34.q.out @@ -0,0 +1,203 @@ +PREHOOK: query: explain +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = 'unknown') + and household_demographics.hd_vehicle_count > 0 + and (case when household_demographics.hd_vehicle_count > 0 + then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count + else null + end) > 1.2 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Mobile County','Maverick County','Huron County','Kittitas County', + 'Fairfield County','Jackson County','Barrow County','Pennington County') + group by ss_ticket_number,ss_customer_sk) dn,customer + where ss_customer_sk = c_customer_sk + and cnt between 15 and 20 + order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = 'unknown') + and household_demographics.hd_vehicle_count > 0 + and (case when household_demographics.hd_vehicle_count > 0 + then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count + else null + end) > 1.2 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Mobile County','Maverick County','Huron County','Kittitas County', + 'Fairfield County','Jackson County','Barrow County','Pennington County') + group by ss_ticket_number,ss_customer_sk) dn,customer + where ss_customer_sk = c_customer_sk + and cnt between 15 and 20 + order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 4 <- Reducer 10 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 6 <- Map 11 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 13 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 vectorized + File Output Operator [FS_134] + Select Operator [SEL_133] (rows=276068 width=364) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_34] + Select Operator [SEL_33] (rows=276068 width=364) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_99] (rows=276068 width=364) + Conds:RS_101._col0=RS_132._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_101] + PartitionCols:_col0 + Select Operator [SEL_100] (rows=80000000 width=356) + Output:["_col0","_col1","_col2","_col3","_col4"] + TableScan [TS_0] (rows=80000000 width=356) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_salutation","c_first_name","c_last_name","c_preferred_cust_flag"] + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_132] + PartitionCols:_col1 + Filter Operator [FIL_131] (rows=276068 width=12) + predicate:_col2 BETWEEN 15 AND 20 + Select Operator [SEL_130] (rows=5521356 width=12) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_129] (rows=5521356 width=12) + Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col0, _col1 + Group By Operator [GBY_24] (rows=5521356 width=12) + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 + Merge Join Operator [MERGEJOIN_98] (rows=5521356 width=4) + Conds:RS_20._col3=RS_120._col0(Inner),Output:["_col1","_col4"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_120] + PartitionCols:_col0 + Select Operator [SEL_119] (rows=112 width=4) + Output:["_col0"] + Filter Operator [FIL_118] (rows=112 width=102) + predicate:(s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County', 'Fairfield County', 'Jackson County', 'Barrow County', 'Pennington County') + TableScan [TS_11] (rows=1704 width=102) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_county"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_20] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_97] (rows=10407948 width=4) + Conds:RS_17._col2=RS_112._col0(Inner),Output:["_col1","_col3","_col4"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_112] + PartitionCols:_col0 + Select Operator [SEL_111] (rows=480 width=4) + Output:["_col0"] + Filter Operator [FIL_110] (rows=480 width=104) + predicate:((hd_buy_potential) IN ('>10000', 'unknown') and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (null) END) + TableScan [TS_8] (rows=7200 width=104) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_buy_potential","hd_dep_count","hd_vehicle_count"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_96] (rows=156119211 width=14) + Conds:RS_128._col0=RS_104._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_104] + PartitionCols:_col0 + Select Operator [SEL_103] (rows=595 width=4) + Output:["_col0"] + Filter Operator [FIL_102] (rows=595 width=12) + predicate:((d_dom BETWEEN 1 AND 3 or d_dom BETWEEN 25 AND 28) and (d_year) IN (2000, 2001, 2002)) + TableScan [TS_5] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_dom"] + <-Map 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_128] + PartitionCols:_col0 + Select Operator [SEL_127] (rows=479121995 width=19) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_126] (rows=479121995 width=19) + predicate:((ss_hdemo_sk BETWEEN DynamicValue(RS_18_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_18_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_18_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_15_date_dim_d_date_sk_min) AND DynamicValue(RS_15_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_15_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_21_store_s_store_sk_min) AND DynamicValue(RS_21_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_21_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_2] (rows=575995635 width=19) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_hdemo_sk","ss_store_sk","ss_ticket_number"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_109] + Group By Operator [GBY_108] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_107] + Group By Operator [GBY_106] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_105] (rows=595 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_103] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_117] + Group By Operator [GBY_116] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_115] + Group By Operator [GBY_114] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_113] (rows=480 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_111] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_125] + Group By Operator [GBY_124] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_123] + Group By Operator [GBY_122] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_121] (rows=112 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_119] +
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query35.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query35.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query35.q.out new file mode 100644 index 0000000..2501199 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query35.q.out @@ -0,0 +1,361 @@ +PREHOOK: query: explain +select + ca_state, + cd_gender, + cd_marital_status, + count(*) cnt1, + avg(cd_dep_count), + max(cd_dep_count), + sum(cd_dep_count), + cd_dep_employed_count, + count(*) cnt2, + avg(cd_dep_employed_count), + max(cd_dep_employed_count), + sum(cd_dep_employed_count), + cd_dep_college_count, + count(*) cnt3, + avg(cd_dep_college_count), + max(cd_dep_college_count), + sum(cd_dep_college_count) + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4)) + group by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select + ca_state, + cd_gender, + cd_marital_status, + count(*) cnt1, + avg(cd_dep_count), + max(cd_dep_count), + sum(cd_dep_count), + cd_dep_employed_count, + count(*) cnt2, + avg(cd_dep_employed_count), + max(cd_dep_employed_count), + sum(cd_dep_employed_count), + cd_dep_college_count, + count(*) cnt3, + avg(cd_dep_college_count), + max(cd_dep_college_count), + sum(cd_dep_college_count) + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 1999 and + d_qoy < 4)) + group by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 13 <- Reducer 16 (BROADCAST_EDGE) +Map 23 <- Reducer 10 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE) +Map 24 <- Reducer 22 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 10 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 15 (SIMPLE_EDGE), Map 23 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE) +Reducer 19 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) +Reducer 20 <- Map 15 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE) +Reducer 21 <- Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 18 (ONE_TO_ONE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) +Reducer 6 <- Reducer 21 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 8 vectorized + File Output Operator [FS_232] + Limit [LIM_231] (rows=1 width=352) + Number of rows:100 + Select Operator [SEL_230] (rows=1 width=352) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_229] + Select Operator [SEL_228] (rows=1 width=352) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17"] + Group By Operator [GBY_227] (rows=1 width=336) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","count(VALUE._col2)","max(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","max(VALUE._col6)","sum(VALUE._col7)","count(VALUE._col8)","max(VALUE._col9)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 + Group By Operator [GBY_64] (rows=1 width=336) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"],aggregations:["count()","sum(_col8)","count(_col8)","max(_col8)","sum(_col9)","count(_col9)","max(_col9)","sum(_col10)","count(_col10)","max(_col10)"],keys:_col4, _col6, _col7, _col8, _col9, _col10 + Top N Key Operator [TNK_102] (rows=67 width=276) + keys:_col4, _col6, _col7, _col8, _col9, _col10,sort order:++++++,top n:100 + Select Operator [SEL_63] (rows=67 width=276) + Output:["_col4","_col6","_col7","_col8","_col9","_col10"] + Filter Operator [FIL_62] (rows=67 width=276) + predicate:(_col12 is not null or _col14 is not null) + Merge Join Operator [MERGEJOIN_180] (rows=67 width=276) + Conds:RS_59._col0=RS_226._col0(Left Outer),Output:["_col4","_col6","_col7","_col8","_col9","_col10","_col12","_col14"] + <-Reducer 5 [ONE_TO_ONE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_59] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_179] (rows=68 width=276) + Conds:RS_56._col0=RS_216._col0(Left Outer),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10","_col12"] + <-Reducer 4 [ONE_TO_ONE_EDGE] + FORWARD [RS_56] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_178] (rows=162346 width=272) + Conds:RS_53._col0=RS_54._col0(Left Semi),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_54] + PartitionCols:_col0 + Group By Operator [GBY_52] (rows=168231 width=2) + Output:["_col0"],keys:_col0 + Select Operator [SEL_16] (rows=62428523 width=2) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_175] (rows=62428523 width=2) + Conds:RS_206._col0=RS_190._col0(Inner),Output:["_col1"] + <-Map 15 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_190] + PartitionCols:_col0 + Select Operator [SEL_189] (rows=217 width=4) + Output:["_col0"] + Filter Operator [FIL_188] (rows=217 width=12) + predicate:((d_qoy < 4) and (d_year = 1999)) + TableScan [TS_10] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_206] + PartitionCols:_col0 + Select Operator [SEL_205] (rows=525327388 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_204] (rows=525327388 width=7) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_14_date_dim_d_date_sk_min) AND DynamicValue(RS_14_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_14_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_7] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_203] + Group By Operator [GBY_202] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_199] + Group By Operator [GBY_196] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_191] (rows=217 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_189] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_53] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_174] (rows=78293105 width=272) + Conds:RS_48._col1=RS_187._col0(Inner),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_187] + PartitionCols:_col0 + Select Operator [SEL_186] (rows=1861800 width=186) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + TableScan [TS_5] (rows=1861800 width=186) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_48] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_173] (rows=77201384 width=93) + Conds:RS_183._col2=RS_185._col0(Inner),Output:["_col0","_col1","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_183] + PartitionCols:_col2 + Select Operator [SEL_182] (rows=77201384 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_181] (rows=77201384 width=11) + predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null) + TableScan [TS_0] (rows=80000000 width=11) + default@customer,c,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_185] + PartitionCols:_col0 + Select Operator [SEL_184] (rows=40000000 width=90) + Output:["_col0","_col1"] + TableScan [TS_3] (rows=40000000 width=90) + default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] + <-Reducer 18 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_216] + PartitionCols:_col0 + Select Operator [SEL_215] (rows=168231 width=7) + Output:["_col0","_col1"] + Group By Operator [GBY_214] (rows=168231 width=3) + Output:["_col0"],keys:KEY._col0 + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_28] + PartitionCols:_col0 + Group By Operator [GBY_27] (rows=168231 width=3) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_176] (rows=17104380 width=3) + Conds:RS_213._col0=RS_192._col0(Inner),Output:["_col1"] + <-Map 15 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_192] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_189] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_213] + PartitionCols:_col0 + Select Operator [SEL_212] (rows=143930993 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_211] (rows=143930993 width=7) + predicate:((ws_bill_customer_sk BETWEEN DynamicValue(RS_56_c_c_customer_sk_min) AND DynamicValue(RS_56_c_c_customer_sk_max) and in_bloom_filter(ws_bill_customer_sk, DynamicValue(RS_56_c_c_customer_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_24_date_dim_d_date_sk_min) AND DynamicValue(RS_24_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_24_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_17] (rows=144002668 width=7) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_210] + Group By Operator [GBY_209] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + FORWARD [RS_150] + Group By Operator [GBY_149] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_148] (rows=162346 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_178] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_208] + Group By Operator [GBY_207] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_200] + Group By Operator [GBY_197] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_193] (rows=217 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_189] + <-Reducer 21 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_226] + PartitionCols:_col0 + Select Operator [SEL_225] (rows=167041 width=7) + Output:["_col0","_col1"] + Group By Operator [GBY_224] (rows=167041 width=3) + Output:["_col0"],keys:KEY._col0 + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_42] + PartitionCols:_col0 + Group By Operator [GBY_41] (rows=167041 width=3) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_177] (rows=33642830 width=3) + Conds:RS_223._col0=RS_194._col0(Inner),Output:["_col1"] + <-Map 15 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_194] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_189] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_223] + PartitionCols:_col0 + Select Operator [SEL_222] (rows=285115246 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_221] (rows=285115246 width=7) + predicate:((cs_ship_customer_sk BETWEEN DynamicValue(RS_59_c_c_customer_sk_min) AND DynamicValue(RS_59_c_c_customer_sk_max) and in_bloom_filter(cs_ship_customer_sk, DynamicValue(RS_59_c_c_customer_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_38_date_dim_d_date_sk_min) AND DynamicValue(RS_38_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_38_date_dim_d_date_sk_bloom_filter))) and cs_ship_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_31] (rows=287989836 width=7) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_218] + Group By Operator [GBY_217] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_201] + Group By Operator [GBY_198] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_195] (rows=217 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_189] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_220] + Group By Operator [GBY_219] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_165] + Group By Operator [GBY_164] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_163] (rows=68 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_179] + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query36.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query36.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query36.q.out new file mode 100644 index 0000000..f2c0b4b --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query36.q.out @@ -0,0 +1,182 @@ +PREHOOK: query: explain +select + sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,item + ,store + where + d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and s_state in ('SD','FL','MI','LA', + 'MO','SC','AL','GA') + group by rollup(i_category,i_class) + order by + lochierarchy desc + ,case when lochierarchy = 0 then i_category end + ,rank_within_parent + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select + sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,item + ,store + where + d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and s_state in ('SD','FL','MI','LA', + 'MO','SC','AL','GA') + group by rollup(i_category,i_class) + order by + lochierarchy desc + ,case when lochierarchy = 0 then i_category end + ,rank_within_parent + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_112] + Limit [LIM_111] (rows=100 width=490) + Number of rows:100 + Select Operator [SEL_110] (rows=3060 width=490) + Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_109] + Select Operator [SEL_108] (rows=3060 width=490) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + PTF Operator [PTF_107] (rows=3060 width=414) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(_col2 / _col3) ASC NULLS FIRST","partition by:":"(grouping(_col4, 1) + grouping(_col4, 0)), CASE WHEN ((grouping(_col4, 0) = 0)) THEN (_col0) ELSE (CAST( null AS STRING)) END"}] + Select Operator [SEL_106] (rows=3060 width=414) + Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_105] + PartitionCols:(grouping(_col4, 1) + grouping(_col4, 0)), CASE WHEN ((grouping(_col4, 0) = 0)) THEN (_col0) ELSE (CAST( null AS STRING)) END + Select Operator [SEL_104] (rows=3060 width=414) + Output:["_col0","_col1","_col2","_col3","_col4"] + Group By Operator [GBY_103] (rows=3060 width=414) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_22] (rows=85680 width=414) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col0, _col1, 0L + Select Operator [SEL_20] (rows=30601888 width=232) + Output:["_col0","_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_81] (rows=30601888 width=232) + Conds:RS_17._col1=RS_102._col0(Inner),Output:["_col3","_col4","_col8","_col9"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_102] + PartitionCols:_col0 + Select Operator [SEL_101] (rows=462000 width=186) + Output:["_col0","_col1","_col2"] + TableScan [TS_9] (rows=462000 width=186) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_class","i_category"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_80] (rows=30601888 width=54) + Conds:RS_14._col2=RS_92._col0(Inner),Output:["_col1","_col3","_col4"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_92] + PartitionCols:_col0 + Select Operator [SEL_91] (rows=278 width=4) + Output:["_col0"] + Filter Operator [FIL_90] (rows=278 width=90) + predicate:(s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC', 'AL', 'GA') + TableScan [TS_6] (rows=1704 width=90) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_14] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_79] (rows=187574154 width=203) + Conds:RS_100._col0=RS_84._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_84] + PartitionCols:_col0 + Select Operator [SEL_83] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_82] (rows=652 width=8) + predicate:(d_year = 1999) + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_100] + PartitionCols:_col0 + Select Operator [SEL_99] (rows=525329897 width=225) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_98] (rows=525329897 width=225) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_12_d1_d_date_sk_min) AND DynamicValue(RS_12_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_12_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_15_store_s_store_sk_min) AND DynamicValue(RS_15_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_15_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=225) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_ext_sales_price","ss_net_profit"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_97] + Group By Operator [GBY_96] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_95] + Group By Operator [GBY_94] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_93] (rows=278 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_91] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_89] + Group By Operator [GBY_88] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_87] + Group By Operator [GBY_86] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_85] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_83] + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query37.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query37.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query37.q.out new file mode 100644 index 0000000..fb4cbf4 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query37.q.out @@ -0,0 +1,146 @@ +PREHOOK: query: explain +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, catalog_sales + where i_current_price between 22 and 22 + 30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2001-06-02' as date) and (cast('2001-06-02' as date) + 60 days) + and i_manufact_id in (678,964,918,849) + and inv_quantity_on_hand between 100 and 500 + and cs_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@inventory +PREHOOK: Input: default@item +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, catalog_sales + where i_current_price between 22 and 22 + 30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2001-06-02' as date) and (cast('2001-06-02' as date) + 60 days) + and i_manufact_id in (678,964,918,849) + and inv_quantity_on_hand between 100 and 500 + and cs_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@inventory +POSTHOOK: Input: default@item +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 11 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 5 vectorized + File Output Operator [FS_102] + Limit [LIM_101] (rows=1 width=396) + Number of rows:100 + Select Operator [SEL_100] (rows=1 width=396) + Output:["_col0","_col1","_col2"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_99] + Group By Operator [GBY_98] (rows=1 width=396) + Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_22] (rows=2 width=396) + Output:["_col0","_col1","_col2"],keys:_col2, _col3, _col4 + Top N Key Operator [TNK_42] (rows=2871 width=396) + keys:_col2, _col3, _col4,sort order:+++,top n:100 + Merge Join Operator [MERGEJOIN_78] (rows=2871 width=396) + Conds:RS_18._col1=RS_19._col1(Inner),Output:["_col2","_col3","_col4"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_77] (rows=463969 width=4) + Conds:RS_89._col0=RS_92._col0(Inner),Output:["_col1"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_92] + PartitionCols:_col0 + Select Operator [SEL_91] (rows=8116 width=4) + Output:["_col0"] + Filter Operator [FIL_90] (rows=8116 width=98) + predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-06-02 00:00:00' AND TIMESTAMP'2001-08-01 00:00:00' + TableScan [TS_8] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_89] + PartitionCols:_col0 + Select Operator [SEL_88] (rows=4176000 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_87] (rows=4176000 width=11) + predicate:inv_quantity_on_hand BETWEEN 100 AND 500 + TableScan [TS_5] (rows=37584000 width=11) + default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_quantity_on_hand"] + <-Reducer 2 [ONE_TO_ONE_EDGE] + FORWARD [RS_18] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_76] (rows=1781971 width=400) + Conds:RS_97._col0=RS_81._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 6 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_81] + PartitionCols:_col0 + Select Operator [SEL_80] (rows=297 width=400) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_79] (rows=297 width=404) + predicate:((i_manufact_id) IN (678, 964, 918, 849) and i_current_price BETWEEN 22 AND 52) + TableScan [TS_2] (rows=462000 width=403) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_manufact_id"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_97] + PartitionCols:_col0 + Select Operator [SEL_96] (rows=287989836 width=4) + Output:["_col0"] + Filter Operator [FIL_95] (rows=287989836 width=4) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_16_item_i_item_sk_min) AND DynamicValue(RS_16_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_16_item_i_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_19_inventory_inv_item_sk_min) AND DynamicValue(RS_19_inventory_inv_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_19_inventory_inv_item_sk_bloom_filter)))) + TableScan [TS_0] (rows=287989836 width=4) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_item_sk"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_94] + Group By Operator [GBY_93] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 9 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_50] + Group By Operator [GBY_49] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_48] (rows=463969 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_77] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_86] + Group By Operator [GBY_85] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_84] + Group By Operator [GBY_83] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_82] (rows=297 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_80] + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query38.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query38.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query38.q.out new file mode 100644 index 0000000..bc22cfb --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query38.q.out @@ -0,0 +1,261 @@ +PREHOOK: query: explain +select count(*) from ( + select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 +) hot_cust +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select count(*) from ( + select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212 + 11 +) hot_cust +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 9 (BROADCAST_EDGE) +Map 19 <- Reducer 13 (BROADCAST_EDGE) +Map 20 <- Reducer 17 (BROADCAST_EDGE) +Reducer 10 <- Map 19 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 11 <- Map 18 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 13 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 20 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 15 <- Map 18 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 17 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Map 18 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 6 <- Union 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_232] + Limit [LIM_231] (rows=1 width=8) + Number of rows:100 + Group By Operator [GBY_230] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_229] + Group By Operator [GBY_228] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_227] (rows=1 width=8) + Filter Operator [FIL_226] (rows=1 width=8) + predicate:(_col3 = 3L) + Select Operator [SEL_225] (rows=165330890 width=8) + Output:["_col3"] + Group By Operator [GBY_224] (rows=165330890 width=282) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Union 5 [SIMPLE_EDGE] + <-Reducer 12 [CONTAINS] vectorized + Reduce Output Operator [RS_242] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_241] (rows=165330890 width=282) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 + Group By Operator [GBY_240] (rows=49146883 width=282) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 + Select Operator [SEL_239] (rows=49146883 width=274) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_238] (rows=49146883 width=274) + Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_39] (rows=49146883 width=274) + Output:["_col0","_col1","_col2"],keys:_col6, _col5, _col3 + Merge Join Operator [MERGEJOIN_175] (rows=49146883 width=274) + Conds:RS_35._col1=RS_217._col0(Inner),Output:["_col3","_col5","_col6"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_217] + PartitionCols:_col0 + Select Operator [SEL_215] (rows=80000000 width=184) + Output:["_col0","_col1","_col2"] + TableScan [TS_6] (rows=80000000 width=184) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_first_name","c_last_name"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_174] (rows=49146883 width=97) + Conds:RS_237._col0=RS_200._col0(Inner),Output:["_col1","_col3"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_200] + PartitionCols:_col0 + Select Operator [SEL_197] (rows=317 width=98) + Output:["_col0","_col1"] + Filter Operator [FIL_196] (rows=317 width=102) + predicate:d_month_seq BETWEEN 1212 AND 1223 + TableScan [TS_3] (rows=73049 width=102) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date","d_month_seq"] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_237] + PartitionCols:_col0 + Select Operator [SEL_236] (rows=285117831 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_235] (rows=285117831 width=7) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_33_date_dim_d_date_sk_min) AND DynamicValue(RS_33_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_33_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_24] (rows=287989836 width=7) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_234] + Group By Operator [GBY_233] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_208] + Group By Operator [GBY_205] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_201] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_197] + <-Reducer 16 [CONTAINS] vectorized + Reduce Output Operator [RS_252] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_251] (rows=165330890 width=282) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 + Group By Operator [GBY_250] (rows=24986582 width=282) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 + Select Operator [SEL_249] (rows=24986582 width=274) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_248] (rows=24986582 width=274) + Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_64] (rows=24986582 width=274) + Output:["_col0","_col1","_col2"],keys:_col6, _col5, _col3 + Merge Join Operator [MERGEJOIN_177] (rows=24986582 width=274) + Conds:RS_60._col1=RS_218._col0(Inner),Output:["_col3","_col5","_col6"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_218] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_215] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_60] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_176] (rows=24986582 width=97) + Conds:RS_247._col0=RS_202._col0(Inner),Output:["_col1","_col3"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_202] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_197] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_247] + PartitionCols:_col0 + Select Operator [SEL_246] (rows=143930993 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_245] (rows=143930993 width=7) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_58_date_dim_d_date_sk_min) AND DynamicValue(RS_58_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_58_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_49] (rows=144002668 width=7) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_244] + Group By Operator [GBY_243] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_209] + Group By Operator [GBY_206] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_203] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_197] + <-Reducer 4 [CONTAINS] vectorized + Reduce Output Operator [RS_223] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_222] (rows=165330890 width=282) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 + Group By Operator [GBY_221] (rows=91197425 width=282) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 + Select Operator [SEL_220] (rows=91197425 width=274) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_219] (rows=91197425 width=274) + Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_15] (rows=91197425 width=274) + Output:["_col0","_col1","_col2"],keys:_col6, _col5, _col3 + Merge Join Operator [MERGEJOIN_173] (rows=91197425 width=274) + Conds:RS_11._col1=RS_216._col0(Inner),Output:["_col3","_col5","_col6"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_216] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_215] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_172] (rows=91197425 width=96) + Conds:RS_214._col0=RS_198._col0(Inner),Output:["_col1","_col3"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_198] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_197] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_214] + PartitionCols:_col0 + Select Operator [SEL_213] (rows=525327388 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_212] (rows=525327388 width=7) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_9_date_dim_d_date_sk_min) AND DynamicValue(RS_9_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_9_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_211] + Group By Operator [GBY_210] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_207] + Group By Operator [GBY_204] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_199] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_197] +
