http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query15.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query15.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query15.q.out new file mode 100644 index 0000000..b41b4e3 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query15.q.out @@ -0,0 +1,142 @@ +PREHOOK: query: explain +select ca_zip + ,sum(cs_sales_price) + from catalog_sales + ,customer + ,customer_address + ,date_dim + where cs_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', + '85392', '85460', '80348', '81792') + or ca_state in ('CA','WA','GA') + or cs_sales_price > 500) + and cs_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2000 + group by ca_zip + order by ca_zip + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select ca_zip + ,sum(cs_sales_price) + from catalog_sales + ,customer + ,customer_address + ,date_dim + where cs_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', + '85392', '85460', '80348', '81792') + or ca_state in ('CA','WA','GA') + or cs_sales_price > 500) + and cs_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2000 + group by ca_zip + order by ca_zip + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 7 <- Reducer 10 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 5 vectorized + File Output Operator [FS_97] + Limit [LIM_96] (rows=100 width=201) + Number of rows:100 + Select Operator [SEL_95] (rows=2555 width=201) + Output:["_col0","_col1"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_94] + Group By Operator [GBY_93] (rows=2555 width=201) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col0 + Group By Operator [GBY_23] (rows=43435 width=201) + Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col3 + Top N Key Operator [TNK_43] (rows=20154874 width=205) + keys:_col3,sort order:+,top n:100 + Select Operator [SEL_22] (rows=20154874 width=205) + Output:["_col3","_col8"] + Filter Operator [FIL_21] (rows=20154874 width=205) + predicate:(_col4 or _col5 or _col9) + Merge Join Operator [MERGEJOIN_76] (rows=20154874 width=205) + Conds:RS_18._col0=RS_19._col1(Inner),Output:["_col3","_col4","_col5","_col8","_col9"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_74] (rows=80000000 width=101) + Conds:RS_79._col1=RS_81._col0(Inner),Output:["_col0","_col3","_col4","_col5"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_79] + PartitionCols:_col1 + Select Operator [SEL_78] (rows=80000000 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_77] (rows=80000000 width=8) + predicate:c_current_addr_sk is not null + TableScan [TS_0] (rows=80000000 width=8) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_81] + PartitionCols:_col0 + Select Operator [SEL_80] (rows=40000000 width=101) + Output:["_col0","_col1","_col2","_col3"] + TableScan [TS_3] (rows=40000000 width=179) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_zip"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_75] (rows=20154874 width=111) + Conds:RS_92._col0=RS_84._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_84] + PartitionCols:_col0 + Select Operator [SEL_83] (rows=130 width=4) + Output:["_col0"] + Filter Operator [FIL_82] (rows=130 width=12) + predicate:((d_qoy = 2) and (d_year = 2000)) + TableScan [TS_8] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_92] + PartitionCols:_col0 + Select Operator [SEL_91] (rows=285117831 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_90] (rows=285117831 width=119) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_12_date_dim_d_date_sk_min) AND DynamicValue(RS_12_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_12_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_5] (rows=287989836 width=119) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_sales_price"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_89] + Group By Operator [GBY_88] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_87] + Group By Operator [GBY_86] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_85] (rows=130 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_83] +
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query16.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query16.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query16.q.out new file mode 100644 index 0000000..3143be8 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query16.q.out @@ -0,0 +1,244 @@ +PREHOOK: query: explain +select + count(distinct cs_order_number) as `order count` + ,sum(cs_ext_ship_cost) as `total shipping cost` + ,sum(cs_net_profit) as `total net profit` +from + catalog_sales cs1 + ,date_dim + ,customer_address + ,call_center +where + d_date between '2001-4-01' and + (cast('2001-4-01' as date) + 60 days) +and cs1.cs_ship_date_sk = d_date_sk +and cs1.cs_ship_addr_sk = ca_address_sk +and ca_state = 'NY' +and cs1.cs_call_center_sk = cc_call_center_sk +and cc_county in ('Ziebach County','Levy County','Huron County','Franklin Parish', + 'Daviess County' +) +and exists (select * + from catalog_sales cs2 + where cs1.cs_order_number = cs2.cs_order_number + and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) +and not exists(select * + from catalog_returns cr1 + where cs1.cs_order_number = cr1.cr_order_number) +order by count(distinct cs_order_number) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@call_center +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select + count(distinct cs_order_number) as `order count` + ,sum(cs_ext_ship_cost) as `total shipping cost` + ,sum(cs_net_profit) as `total net profit` +from + catalog_sales cs1 + ,date_dim + ,customer_address + ,call_center +where + d_date between '2001-4-01' and + (cast('2001-4-01' as date) + 60 days) +and cs1.cs_ship_date_sk = d_date_sk +and cs1.cs_ship_addr_sk = ca_address_sk +and ca_state = 'NY' +and cs1.cs_call_center_sk = cc_call_center_sk +and cc_county in ('Ziebach County','Levy County','Huron County','Franklin Parish', + 'Daviess County' +) +and exists (select * + from catalog_sales cs2 + where cs1.cs_order_number = cs2.cs_order_number + and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) +and not exists(select * + from catalog_returns cr1 + where cs1.cs_order_number = cr1.cr_order_number) +order by count(distinct cs_order_number) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@call_center +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE) +Map 17 <- Reducer 10 (BROADCAST_EDGE) +Reducer 10 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) +Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 17 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Map 18 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 9 vectorized + File Output Operator [FS_169] + Limit [LIM_168] (rows=1 width=240) + Number of rows:100 + Select Operator [SEL_167] (rows=1 width=240) + Output:["_col0","_col1","_col2"] + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_166] + Select Operator [SEL_165] (rows=1 width=240) + Output:["_col1","_col2","_col3"] + Group By Operator [GBY_164] (rows=1 width=232) + Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_163] + Group By Operator [GBY_162] (rows=1 width=232) + Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] + Group By Operator [GBY_161] (rows=5150256 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_69] + PartitionCols:_col0 + Group By Operator [GBY_68] (rows=5150256 width=228) + Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4 + Select Operator [SEL_37] (rows=5150256 width=218) + Output:["_col4","_col5","_col6"] + Filter Operator [FIL_36] (rows=5150256 width=218) + predicate:_col14 is null + Merge Join Operator [MERGEJOIN_125] (rows=13282454 width=218) + Conds:RS_33._col4=RS_160._col0(Left Outer),Output:["_col4","_col5","_col6","_col14"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_160] + PartitionCols:_col0 + Select Operator [SEL_159] (rows=28798881 width=8) + Output:["_col0","_col1"] + TableScan [TS_25] (rows=28798881 width=4) + default@catalog_returns,cr1,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_order_number"] + <-Reducer 5 [ONE_TO_ONE_EDGE] + FORWARD [RS_33] + PartitionCols:_col4 + Select Operator [SEL_32] (rows=5150256 width=200) + Output:["_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_124] (rows=5150256 width=202) + Conds:RS_29._col4=RS_158._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)} + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_123] (rows=5150256 width=200) + Conds:RS_18._col2=RS_144._col0(Inner),Output:["_col3","_col4","_col5","_col6"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_144] + PartitionCols:_col0 + Select Operator [SEL_143] (rows=10 width=102) + Output:["_col0"] + Filter Operator [FIL_142] (rows=10 width=102) + predicate:(cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') + TableScan [TS_9] (rows=60 width=102) + default@call_center,call_center,Tbl:COMPLETE,Col:COMPLETE,Output:["cc_call_center_sk","cc_county"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_122] (rows=30901534 width=230) + Conds:RS_15._col1=RS_136._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_136] + PartitionCols:_col0 + Select Operator [SEL_135] (rows=784314 width=90) + Output:["_col0"] + Filter Operator [FIL_134] (rows=784314 width=90) + predicate:(ca_state = 'NY') + TableScan [TS_6] (rows=40000000 width=90) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_121] (rows=31519516 width=234) + Conds:RS_152._col0=RS_128._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_128] + PartitionCols:_col0 + Select Operator [SEL_127] (rows=8116 width=98) + Output:["_col0"] + Filter Operator [FIL_126] (rows=8116 width=98) + predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-04-01 00:00:00' AND TIMESTAMP'2001-05-31 00:00:00' + TableScan [TS_3] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_152] + PartitionCols:_col0 + Select Operator [SEL_151] (rows=283695062 width=243) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_150] (rows=283695062 width=243) + predicate:((cs_call_center_sk BETWEEN DynamicValue(RS_19_call_center_cc_call_center_sk_min) AND DynamicValue(RS_19_call_center_cc_call_center_sk_max) and in_bloom_filter(cs_call_center_sk, DynamicValue(RS_19_call_center_cc_call_center_sk_bloom_filter))) and (cs_ship_addr_sk BETWEEN DynamicValue(RS_16_customer_address_ca_address_sk_min) AND DynamicValue(RS_16_customer_address_ca_address_sk_max) and in_bloom_filter(cs_ship_addr_sk, DynamicValue(RS_16_customer_address_ca_address_sk_bloom_filter))) and (cs_ship_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(cs_ship_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and cs_call_center_sk is not null and cs_ship_addr_sk is not null and cs_ship_date_sk is not null) + TableScan [TS_0] (rows=287989836 width=243) + default@catalog_sales,cs1,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_ship_date_sk","cs_ship_addr_sk","cs_call_center_sk","cs_warehouse_sk","cs_order_number","cs_ext_ship_cost","cs_net_profit"] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_133] + Group By Operator [GBY_132] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_131] + Group By Operator [GBY_130] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_129] (rows=8116 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_127] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_141] + Group By Operator [GBY_140] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_139] + Group By Operator [GBY_138] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_137] (rows=784314 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_135] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_149] + Group By Operator [GBY_148] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_147] + Group By Operator [GBY_146] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_145] (rows=10 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_143] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_158] + PartitionCols:_col0 + Group By Operator [GBY_157] (rows=286548719 width=7) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_156] (rows=286548719 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_155] (rows=286548719 width=7) + predicate:((cs_order_number BETWEEN DynamicValue(RS_29_cs1_cs_order_number_min) AND DynamicValue(RS_29_cs1_cs_order_number_max) and in_bloom_filter(cs_order_number, DynamicValue(RS_29_cs1_cs_order_number_bloom_filter))) and cs_warehouse_sk is not null) + TableScan [TS_22] (rows=287989836 width=7) + default@catalog_sales,cs2,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_warehouse_sk","cs_order_number"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_154] + Group By Operator [GBY_153] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_111] + Group By Operator [GBY_110] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_109] (rows=5150256 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_123] + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query17.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query17.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query17.q.out new file mode 100644 index 0000000..e796101 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query17.q.out @@ -0,0 +1,319 @@ +PREHOOK: query: explain +select i_item_id + ,i_item_desc + ,s_state + ,count(ss_quantity) as store_sales_quantitycount + ,avg(ss_quantity) as store_sales_quantityave + ,stddev_samp(ss_quantity) as store_sales_quantitystdev + ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov + ,count(sr_return_quantity) as_store_returns_quantitycount + ,avg(sr_return_quantity) as_store_returns_quantityave + ,stddev_samp(sr_return_quantity) as_store_returns_quantitystdev + ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov + ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitystdev + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov + from store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where d1.d_quarter_name = '2000Q1' + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + group by i_item_id + ,i_item_desc + ,s_state + order by i_item_id + ,i_item_desc + ,s_state +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select i_item_id + ,i_item_desc + ,s_state + ,count(ss_quantity) as store_sales_quantitycount + ,avg(ss_quantity) as store_sales_quantityave + ,stddev_samp(ss_quantity) as store_sales_quantitystdev + ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov + ,count(sr_return_quantity) as_store_returns_quantitycount + ,avg(sr_return_quantity) as_store_returns_quantityave + ,stddev_samp(sr_return_quantity) as_store_returns_quantitystdev + ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov + ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitystdev + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov + from store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where d1.d_quarter_name = '2000Q1' + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + group by i_item_id + ,i_item_desc + ,s_state + order by i_item_id + ,i_item_desc + ,s_state +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 12 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Map 19 <- Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) +Reducer 10 <- Map 19 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 12 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 20 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Map 18 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 21 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_259] + Limit [LIM_258] (rows=100 width=466) + Number of rows:100 + Select Operator [SEL_257] (rows=4815969644 width=466) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_256] + Select Operator [SEL_255] (rows=4815969644 width=466) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Group By Operator [GBY_254] (rows=4815969644 width=466) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","count(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","count(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_48] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_47] (rows=4815969644 width=466) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(_col3)","sum(_col3)","sum(_col7)","sum(_col6)","count(_col4)","sum(_col4)","sum(_col9)","sum(_col8)","count(_col5)","sum(_col5)","sum(_col11)","sum(_col10)"],keys:_col0, _col1, _col2 + Top N Key Operator [TNK_91] (rows=4815969644 width=381) + keys:_col0, _col1, _col2,sort order:+++,top n:100 + Select Operator [SEL_45] (rows=4815969644 width=381) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Merge Join Operator [MERGEJOIN_211] (rows=4815969644 width=381) + Conds:RS_42._col3=RS_253._col0(Inner),Output:["_col5","_col8","_col9","_col13","_col19","_col22"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_253] + PartitionCols:_col0 + Select Operator [SEL_252] (rows=1704 width=90) + Output:["_col0","_col1"] + TableScan [TS_31] (rows=1704 width=90) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_42] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_210] (rows=4815969644 width=299) + Conds:RS_39._col1, _col2, _col4=RS_40._col6, _col7, _col8(Inner),Output:["_col3","_col5","_col8","_col9","_col13","_col19"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col6, _col7, _col8 + Merge Join Operator [MERGEJOIN_209] (rows=540026342 width=19) + Conds:RS_27._col2, _col1=RS_28._col1, _col2(Inner),Output:["_col3","_col6","_col7","_col8","_col9"] + <-Reducer 10 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_27] + PartitionCols:_col2, _col1 + Merge Join Operator [MERGEJOIN_207] (rows=14254135 width=11) + Conds:RS_242._col0=RS_220._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_220] + PartitionCols:_col0 + Select Operator [SEL_216] (rows=3652 width=4) + Output:["_col0"] + Filter Operator [FIL_213] (rows=3652 width=94) + predicate:(d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') + TableScan [TS_3] (rows=73049 width=94) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_quarter_name"] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_242] + PartitionCols:_col0 + Select Operator [SEL_241] (rows=285117831 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_240] (rows=285117831 width=15) + predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_28_store_returns_sr_customer_sk_min) AND DynamicValue(RS_28_store_returns_sr_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_28_store_returns_sr_customer_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_28_store_returns_sr_item_sk_min) AND DynamicValue(RS_28_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_28_store_returns_sr_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_25_d3_d_date_sk_min) AND DynamicValue(RS_25_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_25_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_8] (rows=287989836 width=15) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_234] + Group By Operator [GBY_232] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_109] + Group By Operator [GBY_108] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_107] (rows=2681277 width=8) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_208] (rows=2681277 width=10) + Conds:RS_231._col0=RS_222._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_222] + PartitionCols:_col0 + Select Operator [SEL_217] (rows=3652 width=4) + Output:["_col0"] + Filter Operator [FIL_214] (rows=3652 width=94) + predicate:(d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') + Please refer to the previous TableScan [TS_3] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_231] + PartitionCols:_col0 + Select Operator [SEL_230] (rows=53632139 width=19) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_229] (rows=53632139 width=19) + predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null) + TableScan [TS_14] (rows=57591150 width=19) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_239] + Group By Operator [GBY_237] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_124] + Group By Operator [GBY_123] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_122] (rows=2681277 width=2) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_208] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_236] + Group By Operator [GBY_235] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_226] + Group By Operator [GBY_224] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_221] (rows=3652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_216] + <-Reducer 15 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_28] + PartitionCols:_col1, _col2 + Please refer to the previous Merge Join Operator [MERGEJOIN_208] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col1, _col2, _col4 + Merge Join Operator [MERGEJOIN_206] (rows=27749405 width=294) + Conds:RS_36._col1=RS_251._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col8","_col9"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_251] + PartitionCols:_col0 + Select Operator [SEL_250] (rows=462000 width=288) + Output:["_col0","_col1","_col2"] + TableScan [TS_6] (rows=462000 width=288) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_205] (rows=27749405 width=10) + Conds:RS_249._col0=RS_218._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_218] + PartitionCols:_col0 + Select Operator [SEL_215] (rows=101 width=4) + Output:["_col0"] + Filter Operator [FIL_212] (rows=101 width=94) + predicate:(d_quarter_name = '2000Q1') + Please refer to the previous TableScan [TS_3] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_249] + PartitionCols:_col0 + Select Operator [SEL_248] (rows=501694138 width=23) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_247] (rows=501694138 width=23) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_27_catalog_sales_cs_bill_customer_sk_min) AND DynamicValue(RS_27_catalog_sales_cs_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_27_catalog_sales_cs_bill_customer_sk_bloom_filter))) and (ss_customer_sk BETWEEN DynamicValue(RS_28_store_returns_sr_customer_sk_min) AND DynamicValue(RS_28_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_28_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_27_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_27_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_27_catalog_sales_cs_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_28_store_returns_sr_item_sk_min) AND DynamicValue(RS_28_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_28_store_returns_sr_item_sk_bloom_filter))) and (ss_sold_date _sk BETWEEN DynamicValue(RS_34_d1_d_date_sk_min) AND DynamicValue(RS_34_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_34_d1_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=23) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_233] + Please refer to the previous Group By Operator [GBY_232] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_238] + Please refer to the previous Group By Operator [GBY_237] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_244] + Group By Operator [GBY_243] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 10 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_114] + Group By Operator [GBY_113] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_112] (rows=14254135 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_207] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_246] + Group By Operator [GBY_245] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 10 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_129] + Group By Operator [GBY_128] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_127] (rows=14254135 width=7) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_207] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_228] + Group By Operator [GBY_227] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_225] + Group By Operator [GBY_223] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_219] (rows=101 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_215] + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query18.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query18.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query18.q.out new file mode 100644 index 0000000..ff4c05f --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query18.q.out @@ -0,0 +1,239 @@ +PREHOOK: query: explain +select i_item_id, + ca_country, + ca_state, + ca_county, + avg( cast(cs_quantity as numeric(12,2))) agg1, + avg( cast(cs_list_price as numeric(12,2))) agg2, + avg( cast(cs_coupon_amt as numeric(12,2))) agg3, + avg( cast(cs_sales_price as numeric(12,2))) agg4, + avg( cast(cs_net_profit as numeric(12,2))) agg5, + avg( cast(c_birth_year as numeric(12,2))) agg6, + avg( cast(cd1.cd_dep_count as numeric(12,2))) agg7 + from catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd1.cd_demo_sk and + cs_bill_customer_sk = c_customer_sk and + cd1.cd_gender = 'M' and + cd1.cd_education_status = 'College' and + c_current_cdemo_sk = cd2.cd_demo_sk and + c_current_addr_sk = ca_address_sk and + c_birth_month in (9,5,12,4,1,10) and + d_year = 2001 and + ca_state in ('ND','WI','AL' + ,'NC','OK','MS','TN') + group by rollup (i_item_id, ca_country, ca_state, ca_county) + order by ca_country, + ca_state, + ca_county, + i_item_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select i_item_id, + ca_country, + ca_state, + ca_county, + avg( cast(cs_quantity as numeric(12,2))) agg1, + avg( cast(cs_list_price as numeric(12,2))) agg2, + avg( cast(cs_coupon_amt as numeric(12,2))) agg3, + avg( cast(cs_sales_price as numeric(12,2))) agg4, + avg( cast(cs_net_profit as numeric(12,2))) agg5, + avg( cast(c_birth_year as numeric(12,2))) agg6, + avg( cast(cd1.cd_dep_count as numeric(12,2))) agg7 + from catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd1.cd_demo_sk and + cs_bill_customer_sk = c_customer_sk and + cd1.cd_gender = 'M' and + cd1.cd_education_status = 'College' and + c_current_cdemo_sk = cd2.cd_demo_sk and + c_current_addr_sk = ca_address_sk and + c_birth_month in (9,5,12,4,1,10) and + d_year = 2001 and + ca_state in ('ND','WI','AL' + ,'NC','OK','MS','TN') + group by rollup (i_item_id, ca_country, ca_state, ca_county) + order by ca_country, + ca_state, + ca_county, + i_item_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 10 <- Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) +Reducer 12 <- Map 16 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Map 18 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 6 vectorized + File Output Operator [FS_179] + Limit [LIM_178] (rows=100 width=1165) + Number of rows:100 + Select Operator [SEL_177] (rows=10969055 width=1165) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_176] + Select Operator [SEL_175] (rows=10969055 width=1165) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + Group By Operator [GBY_174] (rows=10969055 width=1229) + Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)","sum(VALUE._col8)","count(VALUE._col9)","sum(VALUE._col10)","count(VALUE._col11)","sum(VALUE._col12)","count(VALUE._col13)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col0, _col1, _col2, _col3, _col4 + Group By Operator [GBY_39] (rows=10969055 width=1229) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"],aggregations:["sum(_col15)","count(_col15)","sum(_col16)","count(_col16)","sum(_col17)","count(_col17)","sum(_col18)","count(_col18)","sum(_col19)","count(_col19)","sum(_col3)","count(_col3)","sum(_col22)","count(_col22)"],keys:_col5, _col6, _col7, _col10, 0L + Merge Join Operator [MERGEJOIN_142] (rows=2193811 width=811) + Conds:RS_35._col0=RS_36._col3(Inner),Output:["_col3","_col5","_col6","_col7","_col10","_col15","_col16","_col17","_col18","_col19","_col22"] + <-Reducer 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_35] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_138] (rows=4959744 width=368) + Conds:RS_32._col1=RS_150._col0(Inner),Output:["_col0","_col3","_col5","_col6","_col7"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_150] + PartitionCols:_col0 + Select Operator [SEL_149] (rows=1861800 width=4) + Output:["_col0"] + TableScan [TS_6] (rows=1861800 width=4) + default@customer_demographics,cd2,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_137] (rows=4890586 width=371) + Conds:RS_145._col2=RS_148._col0(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col7"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_145] + PartitionCols:_col2 + Select Operator [SEL_144] (rows=35631408 width=119) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_143] (rows=35631408 width=19) + predicate:((c_birth_month) IN (9, 5, 12, 4, 1, 10) and c_current_addr_sk is not null and c_current_cdemo_sk is not null) + TableScan [TS_0] (rows=80000000 width=19) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk","c_birth_month","c_birth_year"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_148] + PartitionCols:_col0 + Select Operator [SEL_147] (rows=5490196 width=285) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_146] (rows=5490196 width=285) + predicate:(ca_state) IN ('ND', 'WI', 'AL', 'NC', 'OK', 'MS', 'TN') + TableScan [TS_3] (rows=40000000 width=285) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_state","ca_country"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col3 + Select Operator [SEL_28] (rows=15983481 width=735) + Output:["_col1","_col3","_col6","_col7","_col8","_col9","_col10","_col13"] + Merge Join Operator [MERGEJOIN_141] (rows=15983481 width=735) + Conds:RS_25._col3=RS_173._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col8","_col11","_col13"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_173] + PartitionCols:_col0 + Select Operator [SEL_172] (rows=462000 width=104) + Output:["_col0","_col1"] + TableScan [TS_17] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_140] (rows=15983481 width=639) + Conds:RS_22._col2=RS_161._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col11"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_161] + PartitionCols:_col0 + Select Operator [SEL_160] (rows=103433 width=116) + Output:["_col0","_col1"] + Filter Operator [FIL_159] (rows=103433 width=187) + predicate:((cd_education_status = 'College') and (cd_gender = 'M')) + TableScan [TS_14] (rows=1861800 width=187) + default@customer_demographics,cd1,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_education_status","cd_dep_count"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_22] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_139] (rows=100578970 width=565) + Conds:RS_171._col0=RS_153._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Map 14 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_153] + PartitionCols:_col0 + Select Operator [SEL_152] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_151] (rows=652 width=8) + predicate:(d_year = 2001) + TableScan [TS_11] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_171] + PartitionCols:_col0 + Select Operator [SEL_170] (rows=283692098 width=573) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Filter Operator [FIL_169] (rows=283692098 width=466) + predicate:((cs_bill_cdemo_sk BETWEEN DynamicValue(RS_23_cd1_cd_demo_sk_min) AND DynamicValue(RS_23_cd1_cd_demo_sk_max) and in_bloom_filter(cs_bill_cdemo_sk, DynamicValue(RS_23_cd1_cd_demo_sk_bloom_filter))) and (cs_bill_customer_sk BETWEEN DynamicValue(RS_35_customer_c_customer_sk_min) AND DynamicValue(RS_35_customer_c_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_35_customer_c_customer_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_20_date_dim_d_date_sk_min) AND DynamicValue(RS_20_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_20_date_dim_d_date_sk_bloom_filter))) and cs_bill_cdemo_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_8] (rows=287989836 width=466) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_bill_cdemo_sk","cs_item_sk","cs_quantity","cs_list_price","cs_sales_price","cs_coupon_amt","cs_net_profit"] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_158] + Group By Operator [GBY_157] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_156] + Group By Operator [GBY_155] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_154] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_152] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_166] + Group By Operator [GBY_165] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_164] + Group By Operator [GBY_163] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_162] (rows=103433 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_160] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_168] + Group By Operator [GBY_167] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=4890586)"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_123] + Group By Operator [GBY_122] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=4890586)"] + Select Operator [SEL_121] (rows=4959744 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_138] + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query19.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query19.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query19.q.out new file mode 100644 index 0000000..7eb52ef --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query19.q.out @@ -0,0 +1,196 @@ +PREHOOK: query: explain +select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item,customer,customer_address,store + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=7 + and d_moy=11 + and d_year=1999 + and ss_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and substr(ca_zip,1,5) <> substr(s_zip,1,5) + and ss_store_sk = s_store_sk + group by i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact + order by ext_price desc + ,i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item,customer,customer_address,store + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=7 + and d_moy=11 + and d_year=1999 + and ss_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and substr(ca_zip,1,5) <> substr(s_zip,1,5) + and ss_store_sk = s_store_sk + group by i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact + order by ext_price desc + ,i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 8 <- Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE) +Reducer 10 <- Map 13 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 9 <- Map 11 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 6 vectorized + File Output Operator [FS_153] + Limit [LIM_152] (rows=100 width=419) + Number of rows:100 + Select Operator [SEL_151] (rows=2098703 width=418) + Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_150] + Select Operator [SEL_149] (rows=2098703 width=418) + Output:["_col2","_col3","_col4","_col5","_col6"] + Group By Operator [GBY_148] (rows=2098703 width=314) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col0, _col1, _col2, _col3 + Group By Operator [GBY_34] (rows=2098703 width=314) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col8)"],keys:_col12, _col11, _col13, _col14 + Select Operator [SEL_33] (rows=2098703 width=570) + Output:["_col8","_col11","_col12","_col13","_col14"] + Filter Operator [FIL_32] (rows=2098703 width=570) + predicate:(_col3 <> _col16) + Merge Join Operator [MERGEJOIN_121] (rows=2098703 width=570) + Conds:RS_29._col7=RS_147._col0(Inner),Output:["_col3","_col8","_col11","_col12","_col13","_col14","_col16"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_147] + PartitionCols:_col0 + Select Operator [SEL_146] (rows=1704 width=188) + Output:["_col0","_col1"] + TableScan [TS_21] (rows=1704 width=93) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_zip"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col7 + Merge Join Operator [MERGEJOIN_120] (rows=2098703 width=386) + Conds:RS_26._col0=RS_27._col2(Inner),Output:["_col3","_col7","_col8","_col11","_col12","_col13","_col14"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_119] (rows=2098703 width=202) + Conds:RS_17._col1=RS_137._col0(Inner),Output:["_col2","_col3","_col4","_col7","_col8","_col9","_col10"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_137] + PartitionCols:_col0 + Select Operator [SEL_136] (rows=7333 width=206) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_135] (rows=7333 width=210) + predicate:(i_manager_id = 7) + TableScan [TS_11] (rows=462000 width=210) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand_id","i_brand","i_manufact_id","i_manufact","i_manager_id"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_118] (rows=13737330 width=4) + Conds:RS_145._col0=RS_129._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_129] + PartitionCols:_col0 + Select Operator [SEL_128] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_127] (rows=50 width=12) + predicate:((d_moy = 11) and (d_year = 1999)) + TableScan [TS_8] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_145] + PartitionCols:_col0 + Select Operator [SEL_144] (rows=501694138 width=122) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_143] (rows=501694138 width=122) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_18_item_i_item_sk_min) AND DynamicValue(RS_18_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_18_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_15_date_dim_d_date_sk_min) AND DynamicValue(RS_15_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_15_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_5] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ext_sales_price"] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_134] + Group By Operator [GBY_133] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_132] + Group By Operator [GBY_131] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_130] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_128] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_142] + Group By Operator [GBY_141] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_140] + Group By Operator [GBY_139] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_138] (rows=7333 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_136] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_117] (rows=80000000 width=188) + Conds:RS_124._col1=RS_126._col0(Inner),Output:["_col0","_col3"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_124] + PartitionCols:_col1 + Select Operator [SEL_123] (rows=80000000 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_122] (rows=80000000 width=8) + predicate:c_current_addr_sk is not null + TableScan [TS_0] (rows=80000000 width=8) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_126] + PartitionCols:_col0 + Select Operator [SEL_125] (rows=40000000 width=188) + Output:["_col0","_col1"] + TableScan [TS_3] (rows=40000000 width=93) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_zip"] +
