http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query90.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query90.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query90.q.out new file mode 100644 index 0000000..ff28da1 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query90.q.out @@ -0,0 +1,92 @@ +Warning: Shuffle Join MERGEJOIN[152][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product +PREHOOK: query: explain cbo +select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio + from ( select count(*) amc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 6 and 6+1 + and household_demographics.hd_dep_count = 8 + and web_page.wp_char_count between 5000 and 5200) at, + ( select count(*) pmc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 14 and 14+1 + and household_demographics.hd_dep_count = 8 + and web_page.wp_char_count between 5000 and 5200) pt + order by am_pm_ratio + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@time_dim +PREHOOK: Input: default@web_page +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio + from ( select count(*) amc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 6 and 6+1 + and household_demographics.hd_dep_count = 8 + and web_page.wp_char_count between 5000 and 5200) at, + ( select count(*) pmc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 14 and 14+1 + and household_demographics.hd_dep_count = 8 + and web_page.wp_char_count between 5000 and 5200) pt + order by am_pm_ratio + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@time_dim +POSTHOOK: Input: default@web_page +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(am_pm_ratio=[/($0, $1)]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(CAST=[CAST($0):DECIMAL(15, 4)]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_time_sk=[$1], ws_ship_hdemo_sk=[$10], ws_web_page_sk=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($1), IS NOT NULL($12))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(wp_web_page_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $10, 5000, 5200)]) + HiveTableScan(table=[[default, web_page]], table:alias=[web_page]) + HiveProject(t_time_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $3, 6, 7)]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[=($3, 8)]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(CAST=[CAST($0):DECIMAL(15, 4)]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_time_sk=[$1], ws_ship_hdemo_sk=[$10], ws_web_page_sk=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($1), IS NOT NULL($12))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(wp_web_page_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $10, 5000, 5200)]) + HiveTableScan(table=[[default, web_page]], table:alias=[web_page]) + HiveProject(t_time_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $3, 14, 15)]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[=($3, 8)]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) +
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query91.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query91.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query91.q.out new file mode 100644 index 0000000..046a374 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query91.q.out @@ -0,0 +1,109 @@ +PREHOOK: query: explain cbo +select + cc_call_center_id Call_Center, + cc_name Call_Center_Name, + cc_manager Manager, + sum(cr_net_loss) Returns_Loss +from + call_center, + catalog_returns, + date_dim, + customer, + customer_address, + customer_demographics, + household_demographics +where + cr_call_center_sk = cc_call_center_sk +and cr_returned_date_sk = d_date_sk +and cr_returning_customer_sk= c_customer_sk +and cd_demo_sk = c_current_cdemo_sk +and hd_demo_sk = c_current_hdemo_sk +and ca_address_sk = c_current_addr_sk +and d_year = 1999 +and d_moy = 11 +and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') + or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) +and hd_buy_potential like '0-500%' +and ca_gmt_offset = -7 +group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status +order by sum(cr_net_loss) desc +PREHOOK: type: QUERY +PREHOOK: Input: default@call_center +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + cc_call_center_id Call_Center, + cc_name Call_Center_Name, + cc_manager Manager, + sum(cr_net_loss) Returns_Loss +from + call_center, + catalog_returns, + date_dim, + customer, + customer_address, + customer_demographics, + household_demographics +where + cr_call_center_sk = cc_call_center_sk +and cr_returned_date_sk = d_date_sk +and cr_returning_customer_sk= c_customer_sk +and cd_demo_sk = c_current_cdemo_sk +and hd_demo_sk = c_current_hdemo_sk +and ca_address_sk = c_current_addr_sk +and d_year = 1999 +and d_moy = 11 +and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') + or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) +and hd_buy_potential like '0-500%' +and ca_gmt_offset = -7 +group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status +order by sum(cr_net_loss) desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@call_center +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(call_center=[$0], call_center_name=[$1], manager=[$2], returns_loss=[$3]) + HiveSortLimit(sort0=[$4], dir0=[DESC-nulls-last]) + HiveProject(call_center=[$2], call_center_name=[$3], manager=[$4], returns_loss=[$5], (tok_function sum (tok_table_or_col cr_net_loss))=[$5]) + HiveAggregate(group=[{6, 7, 14, 15, 16}], agg#0=[sum($11)]) + HiveJoin(condition=[=($17, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($9, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($11, -7)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($4, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2), IS NOT NULL($3))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) + HiveFilter(condition=[AND(IN($3, _UTF-16LE'Unknown', _UTF-16LE'Advanced Degree'), IN($2, _UTF-16LE'M', _UTF-16LE'W'), IN(ROW($2, $3), ROW(_UTF-16LE'M', _UTF-16LE'Unknown'), ROW(_UTF-16LE'W', _UTF-16LE'Advanced Degree')))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$1], cr_call_center_sk=[$2], cr_net_loss=[$3], d_date_sk=[$4], cc_call_center_sk=[$5], cc_call_center_id=[$6], cc_name=[$7], cc_manager=[$8]) + HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$7], cr_call_center_sk=[$11], cr_net_loss=[$26]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 11))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cc_call_center_sk=[$0], cc_call_center_id=[$1], cc_name=[$6], cc_manager=[$11]) + HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[LIKE($2, _UTF-16LE'0-500%')]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query92.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query92.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query92.q.out new file mode 100644 index 0000000..16098d7 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query92.q.out @@ -0,0 +1,94 @@ +PREHOOK: query: explain cbo +select + sum(ws_ext_discount_amt) as `Excess Discount Amount` +from + web_sales + ,item + ,date_dim +where +i_manufact_id = 269 +and i_item_sk = ws_item_sk +and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) +and d_date_sk = ws_sold_date_sk +and ws_ext_discount_amt + > ( + SELECT + 1.3 * avg(ws_ext_discount_amt) + FROM + web_sales + ,date_dim + WHERE + ws_item_sk = i_item_sk + and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) + and d_date_sk = ws_sold_date_sk + ) +order by sum(ws_ext_discount_amt) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + sum(ws_ext_discount_amt) as `Excess Discount Amount` +from + web_sales + ,item + ,date_dim +where +i_manufact_id = 269 +and i_item_sk = ws_item_sk +and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) +and d_date_sk = ws_sold_date_sk +and ws_ext_discount_amt + > ( + SELECT + 1.3 * avg(ws_ext_discount_amt) + FROM + web_sales + ,date_dim + WHERE + ws_item_sk = i_item_sk + and d_date between '1998-03-18' and + (cast('1998-03-18' as date) + 90 days) + and d_date_sk = ws_sold_date_sk + ) +order by sum(ws_ext_discount_amt) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(excess discount amount=[$0]) + HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) + HiveProject(excess discount amount=[$0], (tok_function sum (tok_table_or_col ws_ext_discount_amt))=[$0]) + HiveAggregate(group=[{}], agg#0=[sum($2)]) + HiveJoin(condition=[AND(>($2, $5), =($6, $1))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_ext_discount_amt=[$22]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00, 1998-06-16 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_item_sk=[$0], CAST3=[$1], i_item_sk=[$2]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_item_sk=[$0], CAST3=[CAST(*(1.3, /($1, $2))):DECIMAL(14, 7)]) + HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_ext_discount_amt=[$22]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00, 1998-06-16 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[=($13, 269)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query93.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query93.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query93.q.out new file mode 100644 index 0000000..4902320 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query93.q.out @@ -0,0 +1,58 @@ +PREHOOK: query: explain cbo +select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'Did not like the warranty') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@reason +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'Did not like the warranty') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@reason +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$1], sort1=[$0], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(ss_customer_sk=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveProject(ss_customer_sk=[$1], act_sales=[CASE(IS NOT NULL($8), *(CAST(-($3, $8)):DECIMAL(10, 0), $4), *(CAST($3):DECIMAL(10, 0), $4))]) + HiveJoin(condition=[AND(=($5, $0), =($7, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_ticket_number=[$9], ss_quantity=[$10], ss_sales_price=[$13]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_reason_sk=[$8], sr_ticket_number=[$9], sr_return_quantity=[$10]) + HiveFilter(condition=[IS NOT NULL($8)]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(r_reason_sk=[$0]) + HiveFilter(condition=[=($2, _UTF-16LE'Did not like the warranty')]) + HiveTableScan(table=[[default, reason]], table:alias=[reason]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out new file mode 100644 index 0000000..198778f --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out @@ -0,0 +1,98 @@ +PREHOOK: query: explain cbo +select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and exists (select * + from web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +and not exists(select * + from web_returns wr1 + where ws1.ws_order_number = wr1.wr_order_number) +order by count(distinct ws_order_number) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Input: default@web_site +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and exists (select * + from web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +and not exists(select * + from web_returns wr1 + where ws1.ws_order_number = wr1.wr_order_number) +order by count(distinct ws_order_number) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Input: default@web_site +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2]) + HiveSortLimit(sort0=[$3], dir0=[ASC], fetch=[100]) + HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2], (tok_functiondi count (tok_table_or_col ws_order_number))=[$0]) + HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[sum($6)]) + HiveFilter(condition=[IS NULL($14)]) + HiveJoin(condition=[=($4, $13)], joinType=[left], algorithm=[none], cost=[not available]) + HiveSemiJoin(condition=[AND(<>($3, $13), =($4, $14))], joinType=[inner]) + HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$3], ws_web_site_sk=[$4], ws_warehouse_sk=[$5], ws_order_number=[$6], ws_ext_ship_cost=[$7], ws_net_profit=[$8], d_date_sk=[$9], d_date=[$10], ca_address_sk=[$0], ca_state=[$1], web_site_sk=[$11], web_company_name=[$12]) + HiveJoin(condition=[=($4, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'TX'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[=($8, _UTF-16LE'TX')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$11], ws_web_site_sk=[$13], ws_warehouse_sk=[$15], ws_order_number=[$17], ws_ext_ship_cost=[$28], ws_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($11), IS NOT NULL($13))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00, 1999-06-30 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(web_site_sk=[$0], web_company_name=[CAST(_UTF-16LE'pri'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[=($14, _UTF-16LE'pri')]) + HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveFilter(condition=[IS NOT NULL($15)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) + HiveProject(wr_order_number0=[$13], $f1=[true]) + HiveTableScan(table=[[default, web_returns]], table:alias=[wr1]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query95.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query95.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query95.q.out new file mode 100644 index 0000000..0546f1c --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query95.q.out @@ -0,0 +1,112 @@ +PREHOOK: query: explain cbo +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Input: default@web_site +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Input: default@web_site +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2]) + HiveSortLimit(sort0=[$3], dir0=[ASC], fetch=[100]) + HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2], (tok_functiondi count (tok_table_or_col ws_order_number))=[$0]) + HiveAggregate(group=[{}], agg#0=[count(DISTINCT $6)], agg#1=[sum($7)], agg#2=[sum($8)]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_order_number=[$14]) + HiveJoin(condition=[=($14, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_order_number=[$1]) + HiveJoin(condition=[AND(=($1, $3), <>($0, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_order_number=[$1]) + HiveJoin(condition=[AND(=($1, $3), <>($0, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) + HiveJoin(condition=[=($3, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($8, _UTF-16LE'TX')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$11], ws_web_site_sk=[$13], ws_order_number=[$17], ws_ext_ship_cost=[$28], ws_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($11), IS NOT NULL($13))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00, 1999-06-30 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(web_site_sk=[$0]) + HiveFilter(condition=[=($14, _UTF-16LE'pri')]) + HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query96.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query96.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query96.q.out new file mode 100644 index 0000000..e32705a --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query96.q.out @@ -0,0 +1,61 @@ +PREHOOK: query: explain cbo +select count(*) +from store_sales + ,household_demographics + ,time_dim, store +where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and household_demographics.hd_dep_count = 5 + and store.s_store_name = 'ese' +order by count(*) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@time_dim +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select count(*) +from store_sales + ,household_demographics + ,time_dim, store +where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and household_demographics.hd_dep_count = 5 + and store.s_store_name = 'ese' +order by count(*) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@time_dim +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(_o__c0=[$0]) + HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) + HiveProject(_o__c0=[$0], (tok_functionstar count)=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(t_time_sk=[$0]) + HiveFilter(condition=[AND(=($3, 8), >=($4, 30))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[=($3, 5)]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[=($5, _UTF-16LE'ese')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query97.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query97.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query97.q.out new file mode 100644 index 0000000..6151627 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query97.q.out @@ -0,0 +1,81 @@ +PREHOOK: query: explain cbo +with ssci as ( +select ss_customer_sk customer_sk + ,ss_item_sk item_sk +from store_sales,date_dim +where ss_sold_date_sk = d_date_sk + and d_month_seq between 1212 and 1212 + 11 +group by ss_customer_sk + ,ss_item_sk), +csci as( + select cs_bill_customer_sk customer_sk + ,cs_item_sk item_sk +from catalog_sales,date_dim +where cs_sold_date_sk = d_date_sk + and d_month_seq between 1212 and 1212 + 11 +group by cs_bill_customer_sk + ,cs_item_sk) + select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only + ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only + ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog +from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk + and ssci.item_sk = csci.item_sk) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ssci as ( +select ss_customer_sk customer_sk + ,ss_item_sk item_sk +from store_sales,date_dim +where ss_sold_date_sk = d_date_sk + and d_month_seq between 1212 and 1212 + 11 +group by ss_customer_sk + ,ss_item_sk), +csci as( + select cs_bill_customer_sk customer_sk + ,cs_item_sk item_sk +from catalog_sales,date_dim +where cs_sold_date_sk = d_date_sk + and d_month_seq between 1212 and 1212 + 11 +group by cs_bill_customer_sk + ,cs_item_sk) + select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only + ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only + ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog +from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk + and ssci.item_sk = csci.item_sk) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(fetch=[100]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[sum($1)], agg#2=[sum($2)]) + HiveProject($f0=[CASE(AND(IS NOT NULL($0), IS NULL($2)), 1, 0)], $f1=[CASE(AND(IS NULL($0), IS NOT NULL($2)), 1, 0)], $f2=[CASE(AND(IS NOT NULL($0), IS NOT NULL($2)), 1, 0)]) + HiveJoin(condition=[AND(=($0, $2), =($1, $3))], joinType=[full], algorithm=[none], cost=[not available]) + HiveProject(ss_customer_sk=[$1], ss_item_sk=[$0]) + HiveAggregate(group=[{1, 2}]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_bill_customer_sk=[$0], cs_item_sk=[$1]) + HiveAggregate(group=[{1, 2}]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query98.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query98.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query98.q.out new file mode 100644 index 0000000..2a6b66f --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query98.q.out @@ -0,0 +1,87 @@ +PREHOOK: query: explain cbo +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ss_ext_sales_price) as itemrevenue + ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over + (partition by i_class) as revenueratio +from + store_sales + ,item + ,date_dim +where + ss_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and ss_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ss_ext_sales_price) as itemrevenue + ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over + (partition by i_class) as revenueratio +from + store_sales + ,item + ,date_dim +where + ss_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and ss_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3], itemrevenue=[$4], revenueratio=[$5]) + HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$6], sort3=[$0], sort4=[$5], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC]) + HiveProject(i_item_desc=[$1], i_category=[$4], i_class=[$3], i_current_price=[$2], itemrevenue=[$5], revenueratio=[/(*($5, CAST(100):DECIMAL(10, 0)), sum($5) OVER (PARTITION BY $3 ORDER BY $3 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING))], (tok_table_or_col i_item_id)=[$0]) + HiveAggregate(group=[{1, 2, 3, 4, 5}], agg#0=[sum($8)]) + HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) + HiveFilter(condition=[IN($12, _UTF-16LE'Jewelry', _UTF-16LE'Sports', _UTF-16LE'Books')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ext_sales_price=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00, 2001-02-11 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query99.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query99.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query99.q.out new file mode 100644 index 0000000..2dccdf2 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query99.q.out @@ -0,0 +1,102 @@ +PREHOOK: query: explain cbo +select + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and + (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and + (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and + (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + catalog_sales + ,warehouse + ,ship_mode + ,call_center + ,date_dim +where + d_month_seq between 1212 and 1212 + 11 +and cs_ship_date_sk = d_date_sk +and cs_warehouse_sk = w_warehouse_sk +and cs_ship_mode_sk = sm_ship_mode_sk +and cs_call_center_sk = cc_call_center_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@call_center +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@ship_mode +PREHOOK: Input: default@warehouse +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and + (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and + (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and + (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + catalog_sales + ,warehouse + ,ship_mode + ,call_center + ,date_dim +where + d_month_seq between 1212 and 1212 + 11 +and cs_ship_date_sk = d_date_sk +and cs_warehouse_sk = w_warehouse_sk +and cs_ship_mode_sk = sm_ship_mode_sk +and cs_call_center_sk = cc_call_center_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@call_center +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@ship_mode +POSTHOOK: Input: default@warehouse +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(_o__c0=[$0], sm_type=[$1], cc_name=[$2], 30 days=[$3], 31-60 days=[$4], 61-90 days=[$5], 91-120 days=[$6], >120 days=[$7]) + HiveSortLimit(sort0=[$8], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) + HiveProject(_o__c0=[$2], sm_type=[$0], cc_name=[$1], 30 days=[$3], 31-60 days=[$4], 61-90 days=[$5], 91-120 days=[$6], >120 days=[$7], (tok_function substr (tok_table_or_col w_warehouse_name) 1 20)=[$2]) + HiveAggregate(group=[{11, 13, 15}], agg#0=[sum($4)], agg#1=[sum($5)], agg#2=[sum($6)], agg#3=[sum($7)], agg#4=[sum($8)]) + HiveJoin(condition=[=($3, $14)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_ship_date_sk=[$2], cs_call_center_sk=[$11], cs_ship_mode_sk=[$13], cs_warehouse_sk=[$14], CASE=[CASE(<=(-($2, $0), 30), 1, 0)], CASE5=[CASE(AND(>(-($2, $0), 30), <=(-($2, $0), 60)), 1, 0)], CASE6=[CASE(AND(>(-($2, $0), 60), <=(-($2, $0), 90)), 1, 0)], CASE7=[CASE(AND(>(-($2, $0), 90), <=(-($2, $0), 120)), 1, 0)], CASE8=[CASE(>(-($2, $0), 120), 1, 0)]) + HiveFilter(condition=[AND(IS NOT NULL($14), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($2))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(sm_ship_mode_sk=[$0], sm_type=[$2]) + HiveTableScan(table=[[default, ship_mode]], table:alias=[ship_mode]) + HiveProject(cc_call_center_sk=[$0], cc_name=[$6]) + HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) + HiveProject(w_warehouse_sk=[$0], substr=[substr($2, 1, 20)]) + HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query1.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query1.q.out new file mode 100644 index 0000000..69f9329 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query1.q.out @@ -0,0 +1,177 @@ +PREHOOK: query: explain +with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_FEE) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =2000 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'NM' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_FEE) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =2000 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'NM' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_159] + Limit [LIM_158] (rows=100 width=100) + Number of rows:100 + Select Operator [SEL_157] (rows=816091 width=100) + Output:["_col0"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_49] + Select Operator [SEL_48] (rows=816091 width=100) + Output:["_col0"] + Filter Operator [FIL_47] (rows=816091 width=324) + predicate:(_col2 > _col6) + Merge Join Operator [MERGEJOIN_133] (rows=2448274 width=324) + Conds:RS_44._col1=RS_156._col1(Inner),Output:["_col2","_col5","_col6"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_131] (rows=2369298 width=213) + Conds:RS_41._col0=RS_151._col0(Inner),Output:["_col1","_col2","_col5"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_151] + PartitionCols:_col0 + Select Operator [SEL_150] (rows=80000000 width=104) + Output:["_col0","_col1"] + TableScan [TS_17] (rows=80000000 width=104) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_130] (rows=2369298 width=114) + Conds:RS_146._col1=RS_149._col0(Inner),Output:["_col0","_col1","_col2"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_149] + PartitionCols:_col0 + Select Operator [SEL_148] (rows=35 width=4) + Output:["_col0"] + Filter Operator [FIL_147] (rows=35 width=90) + predicate:(s_state = 'NM') + TableScan [TS_14] (rows=1704 width=90) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_146] + PartitionCols:_col1 + Select Operator [SEL_145] (rows=14291868 width=119) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_144] (rows=14291868 width=119) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_11] + PartitionCols:_col0, _col1 + Group By Operator [GBY_10] (rows=16855704 width=119) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 + Merge Join Operator [MERGEJOIN_129] (rows=16855704 width=107) + Conds:RS_138._col0=RS_142._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_138] + PartitionCols:_col0 + Select Operator [SEL_136] (rows=51757026 width=119) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_134] (rows=51757026 width=119) + predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null and sr_store_sk is not null) + TableScan [TS_0] (rows=57591150 width=119) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_customer_sk","sr_store_sk","sr_fee"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_142] + PartitionCols:_col0 + Select Operator [SEL_141] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_140] (rows=652 width=8) + predicate:(d_year = 2000) + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_156] + PartitionCols:_col1 + Select Operator [SEL_155] (rows=31 width=115) + Output:["_col0","_col1"] + Group By Operator [GBY_154] (rows=31 width=123) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1 + Select Operator [SEL_153] (rows=14291868 width=119) + Output:["_col1","_col2"] + Group By Operator [GBY_152] (rows=14291868 width=119) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col0 + Group By Operator [GBY_29] (rows=17467258 width=119) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 + Merge Join Operator [MERGEJOIN_132] (rows=17467258 width=107) + Conds:RS_139._col0=RS_143._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_139] + PartitionCols:_col0 + Select Operator [SEL_137] (rows=53634860 width=119) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_135] (rows=53634860 width=119) + predicate:(sr_returned_date_sk is not null and sr_store_sk is not null) + Please refer to the previous TableScan [TS_0] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_143] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_141] + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query10.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query10.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query10.q.out new file mode 100644 index 0000000..3fbd928 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query10.q.out @@ -0,0 +1,379 @@ +PREHOOK: query: explain +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_county in ('Walker County','Richland County','Gaines County','Douglas County','Dona Ana County') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 and 4+3) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 ANd 4+3) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 and 4+3)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_county in ('Walker County','Richland County','Gaines County','Douglas County','Dona Ana County') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 and 4+3) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 ANd 4+3) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 and 4+3)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 14 <- Reducer 11 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) +Map 24 <- Reducer 10 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE) +Map 25 <- Reducer 23 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 10 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Map 16 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) +Reducer 20 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 21 <- Map 16 (SIMPLE_EDGE), Map 25 (SIMPLE_EDGE) +Reducer 22 <- Reducer 21 (SIMPLE_EDGE) +Reducer 23 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 19 (ONE_TO_ONE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) +Reducer 6 <- Reducer 22 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 8 vectorized + File Output Operator [FS_235] + Limit [LIM_234] (rows=1 width=419) + Number of rows:100 + Select Operator [SEL_233] (rows=1 width=419) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_232] + Select Operator [SEL_231] (rows=1 width=419) + Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col8","_col10","_col12"] + Group By Operator [GBY_230] (rows=1 width=379) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7 + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_66] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Group By Operator [GBY_65] (rows=1 width=379) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count()"],keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Top N Key Operator [TNK_102] (rows=58 width=379) + keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13,sort order:++++++++,top n:100 + Select Operator [SEL_64] (rows=58 width=379) + Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Filter Operator [FIL_63] (rows=58 width=379) + predicate:(_col15 is not null or _col17 is not null) + Merge Join Operator [MERGEJOIN_180] (rows=58 width=379) + Conds:RS_60._col0=RS_229._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15","_col17"] + <-Reducer 5 [ONE_TO_ONE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_60] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_179] (rows=58 width=379) + Conds:RS_57._col0=RS_219._col0(Left Outer),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15"] + <-Reducer 4 [ONE_TO_ONE_EDGE] + FORWARD [RS_57] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_178] (rows=7792 width=375) + Conds:RS_54._col0=RS_55._col0(Left Semi),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_54] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_174] (rows=3914656 width=375) + Conds:RS_49._col1=RS_188._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_188] + PartitionCols:_col0 + Select Operator [SEL_187] (rows=1861800 width=375) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + TableScan [TS_6] (rows=1861800 width=375) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_49] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_173] (rows=3860070 width=5) + Conds:RS_183._col2=RS_186._col0(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_183] + PartitionCols:_col2 + Select Operator [SEL_182] (rows=77201384 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_181] (rows=77201384 width=11) + predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null) + TableScan [TS_0] (rows=80000000 width=11) + default@customer,c,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_186] + PartitionCols:_col0 + Select Operator [SEL_185] (rows=2000000 width=102) + Output:["_col0"] + Filter Operator [FIL_184] (rows=2000000 width=102) + predicate:(ca_county) IN ('Walker County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana County') + TableScan [TS_3] (rows=40000000 width=102) + default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col0 + Group By Operator [GBY_53] (rows=155827 width=2) + Output:["_col0"],keys:_col0 + Select Operator [SEL_17] (rows=57825495 width=2) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_175] (rows=57825495 width=2) + Conds:RS_209._col0=RS_191._col0(Inner),Output:["_col1"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_191] + PartitionCols:_col0 + Select Operator [SEL_190] (rows=201 width=4) + Output:["_col0"] + Filter Operator [FIL_189] (rows=201 width=12) + predicate:((d_year = 2002) and d_moy BETWEEN 4 AND 7) + TableScan [TS_11] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_209] + PartitionCols:_col0 + Select Operator [SEL_208] (rows=525327388 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_207] (rows=525327388 width=7) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_54_c_c_customer_sk_min) AND DynamicValue(RS_54_c_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_54_c_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_15_date_dim_d_date_sk_min) AND DynamicValue(RS_15_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_15_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_8] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_206] + Group By Operator [GBY_205] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=3647763)"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_135] + Group By Operator [GBY_134] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=3647763)"] + Select Operator [SEL_133] (rows=3914656 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_174] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_204] + Group By Operator [GBY_203] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_200] + Group By Operator [GBY_197] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_192] (rows=201 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_190] + <-Reducer 19 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_219] + PartitionCols:_col0 + Select Operator [SEL_218] (rows=155827 width=7) + Output:["_col0","_col1"] + Group By Operator [GBY_217] (rows=155827 width=3) + Output:["_col0"],keys:KEY._col0 + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col0 + Group By Operator [GBY_28] (rows=155827 width=3) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_176] (rows=15843227 width=3) + Conds:RS_216._col0=RS_193._col0(Inner),Output:["_col1"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_193] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_190] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_216] + PartitionCols:_col0 + Select Operator [SEL_215] (rows=143930993 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_214] (rows=143930993 width=7) + predicate:((ws_bill_customer_sk BETWEEN DynamicValue(RS_57_c_c_customer_sk_min) AND DynamicValue(RS_57_c_c_customer_sk_max) and in_bloom_filter(ws_bill_customer_sk, DynamicValue(RS_57_c_c_customer_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_25_date_dim_d_date_sk_min) AND DynamicValue(RS_25_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_25_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_18] (rows=144002668 width=7) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_213] + Group By Operator [GBY_212] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + FORWARD [RS_150] + Group By Operator [GBY_149] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_148] (rows=7792 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_178] + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_211] + Group By Operator [GBY_210] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_201] + Group By Operator [GBY_198] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_194] (rows=201 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_190] + <-Reducer 22 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_229] + PartitionCols:_col0 + Select Operator [SEL_228] (rows=154725 width=7) + Output:["_col0","_col1"] + Group By Operator [GBY_227] (rows=154725 width=3) + Output:["_col0"],keys:KEY._col0 + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_43] + PartitionCols:_col0 + Group By Operator [GBY_42] (rows=154725 width=3) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_177] (rows=31162251 width=3) + Conds:RS_226._col0=RS_195._col0(Inner),Output:["_col1"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_195] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_190] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_226] + PartitionCols:_col0 + Select Operator [SEL_225] (rows=285115246 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_224] (rows=285115246 width=7) + predicate:((cs_ship_customer_sk BETWEEN DynamicValue(RS_60_c_c_customer_sk_min) AND DynamicValue(RS_60_c_c_customer_sk_max) and in_bloom_filter(cs_ship_customer_sk, DynamicValue(RS_60_c_c_customer_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_39_date_dim_d_date_sk_min) AND DynamicValue(RS_39_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_39_date_dim_d_date_sk_bloom_filter))) and cs_ship_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_32] (rows=287989836 width=7) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_221] + Group By Operator [GBY_220] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_202] + Group By Operator [GBY_199] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_196] (rows=201 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_190] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_223] + Group By Operator [GBY_222] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_165] + Group By Operator [GBY_164] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_163] (rows=58 width=4) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_179] +
