http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/cbo_query93.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query93.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query93.q.out new file mode 100644 index 0000000..60b7557 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query93.q.out @@ -0,0 +1,59 @@ +PREHOOK: query: explain cbo +select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'Did not like the warranty') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@reason +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'Did not like the warranty') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@reason +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$1], sort1=[$0], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(ss_customer_sk=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveProject(ss_customer_sk=[$1], act_sales=[CASE(IS NOT NULL($8), *(CAST(-($3, $8)):DECIMAL(10, 0), $4), *(CAST($3):DECIMAL(10, 0), $4))]) + HiveJoin(condition=[AND(=($5, $0), =($7, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_ticket_number=[$9], ss_quantity=[$10], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($9))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_reason_sk=[$8], sr_ticket_number=[$9], sr_return_quantity=[$10]) + HiveFilter(condition=[AND(IS NOT NULL($8), IS NOT NULL($2), IS NOT NULL($9))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(r_reason_sk=[$0], r_reason_desc=[CAST(_UTF-16LE'Did not like the warranty'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'Did not like the warranty'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, reason]], table:alias=[reason]) +
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out new file mode 100644 index 0000000..4f97a67 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out @@ -0,0 +1,100 @@ +PREHOOK: query: explain cbo +select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and exists (select * + from web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +and not exists(select * + from web_returns wr1 + where ws1.ws_order_number = wr1.wr_order_number) +order by count(distinct ws_order_number) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Input: default@web_site +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and exists (select * + from web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +and not exists(select * + from web_returns wr1 + where ws1.ws_order_number = wr1.wr_order_number) +order by count(distinct ws_order_number) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Input: default@web_site +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2]) + HiveSortLimit(sort0=[$3], dir0=[ASC], fetch=[100]) + HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2], (tok_functiondi count (tok_table_or_col ws_order_number))=[$0]) + HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[sum($6)]) + HiveFilter(condition=[IS NULL($14)]) + HiveJoin(condition=[=($4, $13)], joinType=[left], algorithm=[none], cost=[not available]) + HiveSemiJoin(condition=[AND(<>($3, $13), =($4, $14))], joinType=[inner]) + HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$3], ws_web_site_sk=[$4], ws_warehouse_sk=[$5], ws_order_number=[$6], ws_ext_ship_cost=[$7], ws_net_profit=[$8], d_date_sk=[$9], d_date=[$10], ca_address_sk=[$0], ca_state=[$1], web_site_sk=[$11], web_company_name=[$12]) + HiveJoin(condition=[=($4, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'TX'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($8, _UTF-16LE'TX'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$11], ws_web_site_sk=[$13], ws_warehouse_sk=[$15], ws_order_number=[$17], ws_ext_ship_cost=[$28], ws_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($11), IS NOT NULL($13), IS NOT NULL($17))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00, 1999-06-30 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(web_site_sk=[$0], web_company_name=[CAST(_UTF-16LE'pri'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($14, _UTF-16LE'pri'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveFilter(condition=[AND(IS NOT NULL($17), IS NOT NULL($15))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) + HiveProject(wr_order_number0=[$0], $f1=[true]) + HiveAggregate(group=[{13}]) + HiveFilter(condition=[IS NOT NULL($13)]) + HiveTableScan(table=[[default, web_returns]], table:alias=[wr1]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/cbo_query95.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query95.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query95.q.out new file mode 100644 index 0000000..c248890 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query95.q.out @@ -0,0 +1,120 @@ +PREHOOK: query: explain cbo +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Input: default@web_site +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Input: default@web_site +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2]) + HiveSortLimit(sort0=[$3], dir0=[ASC], fetch=[100]) + HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2], (tok_functiondi count (tok_table_or_col ws_order_number))=[$0]) + HiveAggregate(group=[{}], agg#0=[count(DISTINCT $7)], agg#1=[sum($8)], agg#2=[sum($9)]) + HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_order_number=[$0]) + HiveAggregate(group=[{14}]) + HiveJoin(condition=[=($14, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_order_number=[$1]) + HiveJoin(condition=[AND(=($1, $3), <>($0, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveFilter(condition=[IS NOT NULL($17)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveFilter(condition=[IS NOT NULL($17)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) + HiveProject(wr_returned_date_sk=[$0], wr_returned_time_sk=[$1], wr_item_sk=[$2], wr_refunded_customer_sk=[$3], wr_refunded_cdemo_sk=[$4], wr_refunded_hdemo_sk=[$5], wr_refunded_addr_sk=[$6], wr_returning_customer_sk=[$7], wr_returning_cdemo_sk=[$8], wr_returning_hdemo_sk=[$9], wr_returning_addr_sk=[$10], wr_web_page_sk=[$11], wr_reason_sk=[$12], wr_order_number=[$13], wr_return_quantity=[$14], wr_return_amt=[$15], wr_return_tax=[$16], wr_return_amt_inc_tax=[$17], wr_fee=[$18], wr_return_ship_cost=[$19], wr_refunded_cash=[$20], wr_reversed_charge=[$21], wr_account_credit=[$22], wr_net_loss=[$23], BLOCK__OFFSET__INSIDE__FILE=[$24], INPUT__FILE__NAME=[$25], ROW__ID=[$26]) + HiveFilter(condition=[IS NOT NULL($13)]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_order_number=[$0]) + HiveAggregate(group=[{1}]) + HiveJoin(condition=[AND(=($1, $3), <>($0, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveFilter(condition=[IS NOT NULL($17)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveFilter(condition=[IS NOT NULL($17)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) + HiveJoin(condition=[=($4, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'TX'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($8, _UTF-16LE'TX'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$11], ws_web_site_sk=[$13], ws_order_number=[$17], ws_ext_ship_cost=[$28], ws_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($17), IS NOT NULL($2), IS NOT NULL($11), IS NOT NULL($13))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00, 1999-06-30 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(web_site_sk=[$0], web_company_name=[CAST(_UTF-16LE'pri'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($14, _UTF-16LE'pri'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/cbo_query96.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query96.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query96.q.out new file mode 100644 index 0000000..6367e98 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query96.q.out @@ -0,0 +1,61 @@ +PREHOOK: query: explain cbo +select count(*) +from store_sales + ,household_demographics + ,time_dim, store +where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and household_demographics.hd_dep_count = 5 + and store.s_store_name = 'ese' +order by count(*) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@time_dim +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select count(*) +from store_sales + ,household_demographics + ,time_dim, store +where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and household_demographics.hd_dep_count = 5 + and store.s_store_name = 'ese' +order by count(*) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@time_dim +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(_o__c0=[$0]) + HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) + HiveProject(_o__c0=[$0], (tok_functionstar count)=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(t_time_sk=[$0], t_hour=[CAST(8):INTEGER], t_minute=[$4]) + HiveFilter(condition=[AND(=($3, 8), >=($4, 30), IS NOT NULL($0))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(hd_demo_sk=[$0], hd_dep_count=[CAST(5):INTEGER]) + HiveFilter(condition=[AND(=($3, 5), IS NOT NULL($0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(s_store_sk=[$0], s_store_name=[CAST(_UTF-16LE'ese'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($5, _UTF-16LE'ese'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/cbo_query97.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query97.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query97.q.out new file mode 100644 index 0000000..5cd8582 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query97.q.out @@ -0,0 +1,81 @@ +PREHOOK: query: explain cbo +with ssci as ( +select ss_customer_sk customer_sk + ,ss_item_sk item_sk +from store_sales,date_dim +where ss_sold_date_sk = d_date_sk + and d_month_seq between 1212 and 1212 + 11 +group by ss_customer_sk + ,ss_item_sk), +csci as( + select cs_bill_customer_sk customer_sk + ,cs_item_sk item_sk +from catalog_sales,date_dim +where cs_sold_date_sk = d_date_sk + and d_month_seq between 1212 and 1212 + 11 +group by cs_bill_customer_sk + ,cs_item_sk) + select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only + ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only + ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog +from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk + and ssci.item_sk = csci.item_sk) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ssci as ( +select ss_customer_sk customer_sk + ,ss_item_sk item_sk +from store_sales,date_dim +where ss_sold_date_sk = d_date_sk + and d_month_seq between 1212 and 1212 + 11 +group by ss_customer_sk + ,ss_item_sk), +csci as( + select cs_bill_customer_sk customer_sk + ,cs_item_sk item_sk +from catalog_sales,date_dim +where cs_sold_date_sk = d_date_sk + and d_month_seq between 1212 and 1212 + 11 +group by cs_bill_customer_sk + ,cs_item_sk) + select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only + ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only + ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog +from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk + and ssci.item_sk = csci.item_sk) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(fetch=[100]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[sum($1)], agg#2=[sum($2)]) + HiveProject($f0=[CASE(AND(IS NOT NULL($0), IS NULL($2)), 1, 0)], $f1=[CASE(AND(IS NULL($0), IS NOT NULL($2)), 1, 0)], $f2=[CASE(AND(IS NOT NULL($0), IS NOT NULL($2)), 1, 0)]) + HiveJoin(condition=[AND(=($0, $2), =($1, $3))], joinType=[full], algorithm=[none], cost=[not available]) + HiveProject(ss_customer_sk=[$1], ss_item_sk=[$0]) + HiveAggregate(group=[{1, 2}]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_bill_customer_sk=[$0], cs_item_sk=[$1]) + HiveAggregate(group=[{1, 2}]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/cbo_query98.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query98.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query98.q.out new file mode 100644 index 0000000..804885c --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query98.q.out @@ -0,0 +1,87 @@ +PREHOOK: query: explain cbo +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ss_ext_sales_price) as itemrevenue + ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over + (partition by i_class) as revenueratio +from + store_sales + ,item + ,date_dim +where + ss_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and ss_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ss_ext_sales_price) as itemrevenue + ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over + (partition by i_class) as revenueratio +from + store_sales + ,item + ,date_dim +where + ss_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and ss_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3], itemrevenue=[$4], revenueratio=[$5]) + HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$6], sort3=[$0], sort4=[$5], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC]) + HiveProject(i_item_desc=[$1], i_category=[$4], i_class=[$3], i_current_price=[$2], itemrevenue=[$5], revenueratio=[/(*($5, CAST(100):DECIMAL(10, 0)), sum($5) OVER (PARTITION BY $3 ORDER BY $3 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING))], (tok_table_or_col i_item_id)=[$0]) + HiveAggregate(group=[{1, 2, 3, 4, 5}], agg#0=[sum($8)]) + HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) + HiveFilter(condition=[AND(IN($12, _UTF-16LE'Jewelry', _UTF-16LE'Sports', _UTF-16LE'Books'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00, 2001-02-11 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/cbo_query99.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query99.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query99.q.out new file mode 100644 index 0000000..75d0e60 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query99.q.out @@ -0,0 +1,106 @@ +PREHOOK: query: explain cbo +select + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and + (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and + (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and + (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + catalog_sales + ,warehouse + ,ship_mode + ,call_center + ,date_dim +where + d_month_seq between 1212 and 1212 + 11 +and cs_ship_date_sk = d_date_sk +and cs_warehouse_sk = w_warehouse_sk +and cs_ship_mode_sk = sm_ship_mode_sk +and cs_call_center_sk = cc_call_center_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@call_center +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@ship_mode +PREHOOK: Input: default@warehouse +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and + (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and + (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and + (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + catalog_sales + ,warehouse + ,ship_mode + ,call_center + ,date_dim +where + d_month_seq between 1212 and 1212 + 11 +and cs_ship_date_sk = d_date_sk +and cs_warehouse_sk = w_warehouse_sk +and cs_ship_mode_sk = sm_ship_mode_sk +and cs_call_center_sk = cc_call_center_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@call_center +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@ship_mode +POSTHOOK: Input: default@warehouse +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(_o__c0=[$0], sm_type=[$1], cc_name=[$2], 30 days=[$3], 31-60 days=[$4], 61-90 days=[$5], 91-120 days=[$6], >120 days=[$7]) + HiveSortLimit(sort0=[$8], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) + HiveProject(_o__c0=[$0], sm_type=[$1], cc_name=[$2], 30 days=[$3], 31-60 days=[$4], 61-90 days=[$5], 91-120 days=[$6], >120 days=[$7], (tok_function substr (tok_table_or_col w_warehouse_name) 1 20)=[$0]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[sum($4)], agg#2=[sum($5)], agg#3=[sum($6)], agg#4=[sum($7)]) + HiveProject($f0=[substr($10, 1, 20)], $f1=[$12], $f2=[$8], $f3=[CASE(<=(-($1, $0), 30), 1, 0)], $f4=[CASE(AND(>(-($1, $0), 30), <=(-($1, $0), 60)), 1, 0)], $f5=[CASE(AND(>(-($1, $0), 60), <=(-($1, $0), 90)), 1, 0)], $f6=[CASE(AND(>(-($1, $0), 90), <=(-($1, $0), 120)), 1, 0)], $f7=[CASE(>(-($1, $0), 120), 1, 0)]) + HiveJoin(condition=[=($3, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_ship_date_sk=[$2], cs_call_center_sk=[$11], cs_ship_mode_sk=[$13], cs_warehouse_sk=[$14]) + HiveFilter(condition=[AND(IS NOT NULL($14), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($2))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cc_call_center_sk=[$0], cc_name=[$6]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) + HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) + HiveProject(sm_ship_mode_sk=[$0], sm_type=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, ship_mode]], table:alias=[ship_mode]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query1.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query1.q.out new file mode 100644 index 0000000..13801ff --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query1.q.out @@ -0,0 +1,90 @@ +PREHOOK: query: explain cbo +with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_FEE) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =2000 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'NM' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_FEE) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =2000 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'NM' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(c_customer_id=[$1]) + HiveJoin(condition=[AND(=($3, $7), >($4, $6))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($3, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]) + HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[=($24, _UTF-16LE'NM')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(_o__c0=[*(/($1, $2), 1.2)], ctr_store_sk=[$0]) + HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]) + HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]) + HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query10.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query10.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query10.q.out new file mode 100644 index 0000000..42e3df0 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query10.q.out @@ -0,0 +1,177 @@ +PREHOOK: query: explain cbo +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_county in ('Walker County','Richland County','Gaines County','Douglas County','Dona Ana County') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 and 4+3) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 ANd 4+3) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 and 4+3)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_county in ('Walker County','Richland County','Gaines County','Douglas County','Dona Ana County') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 and 4+3) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 ANd 4+3) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 and 4+3)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$4], sort4=[$6], sort5=[$8], sort6=[$10], sort7=[$12], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], dir5=[ASC], dir6=[ASC], dir7=[ASC], fetch=[100]) + HiveProject(cd_gender=[$0], cd_marital_status=[$1], cd_education_status=[$2], cnt1=[$8], cd_purchase_estimate=[$3], cnt2=[$8], cd_credit_rating=[$4], cnt3=[$8], cd_dep_count=[$5], cnt4=[$8], cd_dep_employed_count=[$6], cnt5=[$8], cd_dep_college_count=[$7], cnt6=[$8]) + HiveAggregate(group=[{6, 7, 8, 9, 10, 11, 12, 13}], agg#0=[count()]) + HiveFilter(condition=[OR(IS NOT NULL($15), IS NOT NULL($17))]) + HiveJoin(condition=[=($0, $16)], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $14)], joinType=[left], algorithm=[none], cost=[not available]) + HiveSemiJoin(condition=[=($0, $14)], joinType=[inner]) + HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_addr_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, customer]], table:alias=[c]) + HiveProject(ca_address_sk=[$0], ca_county=[$7]) + HiveFilter(condition=[IN($7, _UTF-16LE'Walker County', _UTF-16LE'Richland County', _UTF-16LE'Gaines County', _UTF-16LE'Douglas County', _UTF-16LE'Dona Ana County')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ca]) + HiveProject(cd_demo_sk=[$0], cd_gender=[$1], cd_marital_status=[$2], cd_education_status=[$3], cd_purchase_estimate=[$4], cd_credit_rating=[$5], cd_dep_count=[$6], cd_dep_employed_count=[$7], cd_dep_college_count=[$8]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(ss_customer_sk0=[$1]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2002), BETWEEN(false, $8, 4, 7))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_bill_customer_sk0=[$0], $f1=[true]) + HiveAggregate(group=[{1}]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2002), BETWEEN(false, $8, 4, 7))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_ship_customer_sk0=[$0], $f1=[true]) + HiveAggregate(group=[{1}]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_ship_customer_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2002), BETWEEN(false, $8, 4, 7))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out new file mode 100644 index 0000000..cd1eb71 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out @@ -0,0 +1,215 @@ +PREHOOK: query: explain cbo +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select t_s_secyear.c_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_w_firstyear.dyear = 2001 + and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by t_s_secyear.c_preferred_cust_flag +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select t_s_secyear.c_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_w_firstyear.dyear = 2001 + and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by t_s_secyear.c_preferred_cust_flag +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) + HiveProject(c_preferred_cust_flag=[$1]) + HiveJoin(condition=[AND(=($0, $5), CASE($7, CASE($10, >(/($4, $9), /($2, $6)), >(null, /($2, $6))), CASE($10, >(/($4, $9), null), null)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f4=[$3], $f9=[$7]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], -=[-($17, $14)]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f8=[$7]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], -=[-($25, $22)]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) + HiveFilter(condition=[>($7, 0)]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], -=[-($17, $14)]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) + HiveFilter(condition=[>($7, 0)]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], -=[-($25, $22)]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query12.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query12.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query12.q.out new file mode 100644 index 0000000..ab4a837 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query12.q.out @@ -0,0 +1,89 @@ +PREHOOK: query: explain cbo +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ws_ext_sales_price) as itemrevenue + ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over + (partition by i_class) as revenueratio +from + web_sales + ,item + ,date_dim +where + ws_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and ws_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ws_ext_sales_price) as itemrevenue + ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over + (partition by i_class) as revenueratio +from + web_sales + ,item + ,date_dim +where + ws_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and ws_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3], itemrevenue=[$4], revenueratio=[$5]) + HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$6], sort3=[$0], sort4=[$5], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], fetch=[100]) + HiveProject(i_item_desc=[$1], i_category=[$4], i_class=[$3], i_current_price=[$2], itemrevenue=[$5], revenueratio=[/(*($5, CAST(100):DECIMAL(10, 0)), sum($5) OVER (PARTITION BY $3 ORDER BY $3 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING))], (tok_table_or_col i_item_id)=[$0]) + HiveAggregate(group=[{1, 2, 3, 4, 5}], agg#0=[sum($8)]) + HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) + HiveFilter(condition=[IN($12, _UTF-16LE'Jewelry', _UTF-16LE'Sports', _UTF-16LE'Books')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_ext_sales_price=[$23]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00, 2001-02-11 00:00:00)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query13.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query13.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query13.q.out new file mode 100644 index 0000000..811a518 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query13.q.out @@ -0,0 +1,137 @@ +PREHOOK: query: explain cbo +select avg(ss_quantity) + ,avg(ss_ext_sales_price) + ,avg(ss_ext_wholesale_cost) + ,sum(ss_ext_wholesale_cost) + from store_sales + ,store + ,customer_demographics + ,household_demographics + ,customer_address + ,date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2001 + and((ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'M' + and cd_education_status = '4 yr Degree' + and ss_sales_price between 100.00 and 150.00 + and hd_dep_count = 3 + )or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'D' + and cd_education_status = 'Primary' + and ss_sales_price between 50.00 and 100.00 + and hd_dep_count = 1 + ) or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'U' + and cd_education_status = 'Advanced Degree' + and ss_sales_price between 150.00 and 200.00 + and hd_dep_count = 1 + )) + and((ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('KY', 'GA', 'NM') + and ss_net_profit between 100 and 200 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('MT', 'OR', 'IN') + and ss_net_profit between 150 and 300 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('WI', 'MO', 'WV') + and ss_net_profit between 50 and 250 + )) +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select avg(ss_quantity) + ,avg(ss_ext_sales_price) + ,avg(ss_ext_wholesale_cost) + ,sum(ss_ext_wholesale_cost) + from store_sales + ,store + ,customer_demographics + ,household_demographics + ,customer_address + ,date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2001 + and((ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'M' + and cd_education_status = '4 yr Degree' + and ss_sales_price between 100.00 and 150.00 + and hd_dep_count = 3 + )or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'D' + and cd_education_status = 'Primary' + and ss_sales_price between 50.00 and 100.00 + and hd_dep_count = 1 + ) or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'U' + and cd_education_status = 'Advanced Degree' + and ss_sales_price between 150.00 and 200.00 + and hd_dep_count = 1 + )) + and((ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('KY', 'GA', 'NM') + and ss_net_profit between 100 and 200 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('MT', 'OR', 'IN') + and ss_net_profit between 150 and 300 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('WI', 'MO', 'WV') + and ss_net_profit between 50 and 250 + )) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject($f0=[/(CAST($0):DOUBLE, $1)], $f1=[/($2, $3)], $f2=[/($4, $5)], $f3=[CAST($4):DECIMAL(17, 2)]) + HiveAggregate(group=[{}], agg#0=[sum($19)], agg#1=[count($19)], agg#2=[sum($20)], agg#3=[count($20)], agg#4=[sum($21)], agg#5=[count($21)]) + HiveJoin(condition=[AND(=($0, $16), OR(AND($1, $2, $25, $12), AND($3, $4, $26, $13), AND($5, $6, $27, $13)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], ==[=($2, _UTF-16LE'M')], =2=[=($3, _UTF-16LE'4 yr Degree')], =3=[=($2, _UTF-16LE'D')], =4=[=($3, _UTF-16LE'Primary')], =5=[=($2, _UTF-16LE'U')], =6=[=($3, _UTF-16LE'Advanced Degree')]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveJoin(condition=[AND(=($11, $0), OR(AND($1, $15), AND($2, $16), AND($3, $17)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(hd_demo_sk=[$0], ==[=($3, 3)], =2=[=($3, 1)]) + HiveFilter(condition=[IN($3, 3, 1)]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveJoin(condition=[=($1, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_quantity=[$10], ss_ext_sales_price=[$15], ss_ext_wholesale_cost=[$16], BETWEEN=[BETWEEN(false, $22, 100, 200)], BETWEEN9=[BETWEEN(false, $22, 150, 300)], BETWEEN10=[BETWEEN(false, $22, 50, 250)], BETWEEN11=[BETWEEN(false, $13, 100, 150)], BETWEEN12=[BETWEEN(false, $13, 50, 100)], BETWEEN13=[BETWEEN(false, $13, 150, 200)]) + HiveFilter(condition=[AND(OR(BETWEEN(false, $13, 100, 150), BETWEEN(false, $13, 50, 100), BETWEEN(false, $13, 150, 200)), OR(BETWEEN(false, $22, 100, 200), BETWEEN(false, $22, 150, 300), BETWEEN(false, $22, 50, 250)), IS NOT NULL($7), IS NOT NULL($4), IS NOT NULL($5), IS NOT NULL($6), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) +
