http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out new file mode 100644 index 0000000..ee94ea3 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out @@ -0,0 +1,219 @@ +PREHOOK: query: explain cbo +with sr_items as + (select i_item_id item_id, + sum(sr_return_quantity) sr_item_qty + from store_returns, + item, + date_dim + where sr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and sr_returned_date_sk = d_date_sk + group by i_item_id), + cr_items as + (select i_item_id item_id, + sum(cr_return_quantity) cr_item_qty + from catalog_returns, + item, + date_dim + where cr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and cr_returned_date_sk = d_date_sk + group by i_item_id), + wr_items as + (select i_item_id item_id, + sum(wr_return_quantity) wr_item_qty + from web_returns, + item, + date_dim + where wr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and wr_returned_date_sk = d_date_sk + group by i_item_id) + select sr_items.item_id + ,sr_item_qty + ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev + ,cr_item_qty + ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev + ,wr_item_qty + ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev + ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average + from sr_items + ,cr_items + ,wr_items + where sr_items.item_id=cr_items.item_id + and sr_items.item_id=wr_items.item_id + order by sr_items.item_id + ,sr_item_qty + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@web_returns +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with sr_items as + (select i_item_id item_id, + sum(sr_return_quantity) sr_item_qty + from store_returns, + item, + date_dim + where sr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and sr_returned_date_sk = d_date_sk + group by i_item_id), + cr_items as + (select i_item_id item_id, + sum(cr_return_quantity) cr_item_qty + from catalog_returns, + item, + date_dim + where cr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and cr_returned_date_sk = d_date_sk + group by i_item_id), + wr_items as + (select i_item_id item_id, + sum(wr_return_quantity) wr_item_qty + from web_returns, + item, + date_dim + where wr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and wr_returned_date_sk = d_date_sk + group by i_item_id) + select sr_items.item_id + ,sr_item_qty + ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev + ,cr_item_qty + ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev + ,wr_item_qty + ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev + ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average + from sr_items + ,cr_items + ,wr_items + where sr_items.item_id=cr_items.item_id + and sr_items.item_id=wr_items.item_id + order by sr_items.item_id + ,sr_item_qty + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@web_returns +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(item_id=[$0], sr_item_qty=[$4], sr_dev=[*(/(/($5, CAST(+(+($4, $1), $7)):DOUBLE), CAST(3):DOUBLE), CAST(100):DOUBLE)], cr_item_qty=[$1], cr_dev=[*(/(/($2, CAST(+(+($4, $1), $7)):DOUBLE), CAST(3):DOUBLE), CAST(100):DOUBLE)], wr_item_qty=[$7], wr_dev=[*(/(/($8, CAST(+(+($4, $1), $7)):DOUBLE), CAST(3):DOUBLE), CAST(100):DOUBLE)], average=[/(CAST(+(+($4, $1), $7)):DECIMAL(19, 0), 3)]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(item_id=[$0], cr_item_qty=[$1], CAST=[CAST($1):DOUBLE]) + HiveAggregate(group=[{4}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cr_returned_date_sk=[$0], cr_item_sk=[$2], cr_return_quantity=[$17]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_date=[$1], d_date0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date=[$0]) + HiveAggregate(group=[{0}]) + HiveSemiJoin(condition=[=($1, $6)], joinType=[inner]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], d_date_id=[$1], d_date=[$2], d_month_seq=[$3], d_week_seq=[$4], d_quarter_seq=[$5], d_year=[$6], d_dow=[$7], d_moy=[$8], d_dom=[$9], d_qoy=[$10], d_fy_year=[$11], d_fy_quarter_seq=[$12], d_fy_week_seq=[$13], d_day_name=[$14], d_quarter_name=[$15], d_holiday=[$16], d_weekend=[$17], d_following_holiday=[$18], d_first_dom=[$19], d_last_dom=[$20], d_same_day_ly=[$21], d_same_day_lq=[$22], d_current_day=[$23], d_current_week=[$24], d_current_month=[$25], d_current_quarter=[$26], d_current_year=[$27], BLOCK__OFFSET__INSIDE__FILE=[$28], INPUT__FILE__NAME=[$29], ROW__ID=[$30]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(item_id=[$0], sr_item_qty=[$1], CAST=[CAST($1):DOUBLE]) + HiveAggregate(group=[{4}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$2], sr_return_quantity=[$10]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_date=[$1], d_date0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date=[$0]) + HiveAggregate(group=[{0}]) + HiveSemiJoin(condition=[=($1, $6)], joinType=[inner]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], d_date_id=[$1], d_date=[$2], d_month_seq=[$3], d_week_seq=[$4], d_quarter_seq=[$5], d_year=[$6], d_dow=[$7], d_moy=[$8], d_dom=[$9], d_qoy=[$10], d_fy_year=[$11], d_fy_quarter_seq=[$12], d_fy_week_seq=[$13], d_day_name=[$14], d_quarter_name=[$15], d_holiday=[$16], d_weekend=[$17], d_following_holiday=[$18], d_first_dom=[$19], d_last_dom=[$20], d_same_day_ly=[$21], d_same_day_lq=[$22], d_current_day=[$23], d_current_week=[$24], d_current_month=[$25], d_current_quarter=[$26], d_current_year=[$27], BLOCK__OFFSET__INSIDE__FILE=[$28], INPUT__FILE__NAME=[$29], ROW__ID=[$30]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(item_id=[$0], wr_item_qty=[$1], CAST=[CAST($1):DOUBLE]) + HiveAggregate(group=[{4}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_returned_date_sk=[$0], wr_item_sk=[$2], wr_return_quantity=[$14]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_date=[$1], d_date0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date=[$0]) + HiveAggregate(group=[{0}]) + HiveSemiJoin(condition=[=($1, $6)], joinType=[inner]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], d_date_id=[$1], d_date=[$2], d_month_seq=[$3], d_week_seq=[$4], d_quarter_seq=[$5], d_year=[$6], d_dow=[$7], d_moy=[$8], d_dom=[$9], d_qoy=[$10], d_fy_year=[$11], d_fy_quarter_seq=[$12], d_fy_week_seq=[$13], d_day_name=[$14], d_quarter_name=[$15], d_holiday=[$16], d_weekend=[$17], d_following_holiday=[$18], d_first_dom=[$19], d_last_dom=[$20], d_same_day_ly=[$21], d_same_day_lq=[$22], d_current_day=[$23], d_current_week=[$24], d_current_month=[$25], d_current_quarter=[$26], d_current_year=[$27], BLOCK__OFFSET__INSIDE__FILE=[$28], INPUT__FILE__NAME=[$29], ROW__ID=[$30]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) +
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query84.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query84.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query84.q.out new file mode 100644 index 0000000..43ea953 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query84.q.out @@ -0,0 +1,83 @@ +PREHOOK: query: explain cbo +select c_customer_id as customer_id + ,c_last_name || ', ' || c_first_name as customername + from customer + ,customer_address + ,customer_demographics + ,household_demographics + ,income_band + ,store_returns + where ca_city = 'Hopewell' + and c_current_addr_sk = ca_address_sk + and ib_lower_bound >= 32287 + and ib_upper_bound <= 32287 + 50000 + and ib_income_band_sk = hd_income_band_sk + and cd_demo_sk = c_current_cdemo_sk + and hd_demo_sk = c_current_hdemo_sk + and sr_cdemo_sk = cd_demo_sk + order by c_customer_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@income_band +PREHOOK: Input: default@store_returns +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select c_customer_id as customer_id + ,c_last_name || ', ' || c_first_name as customername + from customer + ,customer_address + ,customer_demographics + ,household_demographics + ,income_band + ,store_returns + where ca_city = 'Hopewell' + and c_current_addr_sk = ca_address_sk + and ib_lower_bound >= 32287 + and ib_upper_bound <= 32287 + 50000 + and ib_income_band_sk = hd_income_band_sk + and cd_demo_sk = c_current_cdemo_sk + and hd_demo_sk = c_current_hdemo_sk + and sr_cdemo_sk = cd_demo_sk + order by c_customer_id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@income_band +POSTHOOK: Input: default@store_returns +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(customer_id=[$0], customername=[$1]) + HiveSortLimit(sort0=[$2], dir0=[ASC], fetch=[100]) + HiveProject(customer_id=[$2], customername=[$6], c_customer_id=[$2]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_cdemo_sk=[$4]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(cd_demo_sk=[$0]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(c_customer_id=[$0], c_current_cdemo_sk=[$1], c_current_hdemo_sk=[$2], c_current_addr_sk=[$3], ||=[$4], ca_address_sk=[$5], hd_demo_sk=[$6], hd_income_band_sk=[$7], ib_income_band_sk=[$8]) + HiveJoin(condition=[=($6, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_id=[$1], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], ||=[||(||($9, _UTF-16LE', '), $8)]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2), IS NOT NULL($3))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($6, _UTF-16LE'Hopewell')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1], ib_income_band_sk=[$2]) + HiveJoin(condition=[=($2, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1]) + HiveFilter(condition=[IS NOT NULL($1)]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(ib_income_band_sk=[$0]) + HiveFilter(condition=[AND(>=($1, 32287), <=($2, 82287))]) + HiveTableScan(table=[[default, income_band]], table:alias=[income_band]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query85.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query85.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query85.q.out new file mode 100644 index 0000000..1876936 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query85.q.out @@ -0,0 +1,214 @@ +PREHOOK: query: explain cbo +select substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) + from web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason + where ws_web_page_sk = wp_web_page_sk + and ws_item_sk = wr_item_sk + and ws_order_number = wr_order_number + and ws_sold_date_sk = d_date_sk and d_year = 1998 + and cd1.cd_demo_sk = wr_refunded_cdemo_sk + and cd2.cd_demo_sk = wr_returning_cdemo_sk + and ca_address_sk = wr_refunded_addr_sk + and r_reason_sk = wr_reason_sk + and + ( + ( + cd1.cd_marital_status = 'M' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = '4 yr Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 100.00 and 150.00 + ) + or + ( + cd1.cd_marital_status = 'D' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Primary' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 50.00 and 100.00 + ) + or + ( + cd1.cd_marital_status = 'U' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Advanced Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ca_country = 'United States' + and + ca_state in ('KY', 'GA', 'NM') + and ws_net_profit between 100 and 200 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('MT', 'OR', 'IN') + and ws_net_profit between 150 and 300 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('WI', 'MO', 'WV') + and ws_net_profit between 50 and 250 + ) + ) +group by r_reason_desc +order by substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@reason +PREHOOK: Input: default@web_page +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) + from web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason + where ws_web_page_sk = wp_web_page_sk + and ws_item_sk = wr_item_sk + and ws_order_number = wr_order_number + and ws_sold_date_sk = d_date_sk and d_year = 1998 + and cd1.cd_demo_sk = wr_refunded_cdemo_sk + and cd2.cd_demo_sk = wr_returning_cdemo_sk + and ca_address_sk = wr_refunded_addr_sk + and r_reason_sk = wr_reason_sk + and + ( + ( + cd1.cd_marital_status = 'M' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = '4 yr Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 100.00 and 150.00 + ) + or + ( + cd1.cd_marital_status = 'D' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Primary' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 50.00 and 100.00 + ) + or + ( + cd1.cd_marital_status = 'U' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Advanced Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ca_country = 'United States' + and + ca_state in ('KY', 'GA', 'NM') + and ws_net_profit between 100 and 200 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('MT', 'OR', 'IN') + and ws_net_profit between 150 and 300 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('WI', 'MO', 'WV') + and ws_net_profit between 50 and 250 + ) + ) +group by r_reason_desc +order by substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@reason +POSTHOOK: Input: default@web_page +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(_o__c0=[$0], _o__c1=[$1], _o__c2=[$2], _o__c3=[$3]) + HiveSortLimit(sort0=[$7], sort1=[$4], sort2=[$5], sort3=[$6], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) + HiveProject(_o__c0=[substr($0, 1, 20)], _o__c1=[/(CAST($1):DOUBLE, $2)], _o__c2=[/($3, $4)], _o__c3=[/($5, $6)], (tok_function avg (tok_table_or_col ws_quantity))=[/(CAST($1):DOUBLE, $2)], (tok_function avg (tok_table_or_col wr_refunded_cash))=[/($3, $4)], (tok_function avg (tok_table_or_col wr_fee))=[/($5, $6)], (tok_function substr (tok_table_or_col r_reason_desc) 1 20)=[substr($0, 1, 20)]) + HiveAggregate(group=[{14}], agg#0=[sum($30)], agg#1=[count($30)], agg#2=[sum($26)], agg#3=[count($26)], agg#4=[sum($25)], agg#5=[count($25)]) + HiveJoin(condition=[AND(AND(AND(=($1, $17), =($2, $18)), =($0, $20)), OR(AND($3, $4, $34), AND($5, $6, $35), AND($7, $8, $36)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3], ==[=($2, _UTF-16LE'M')], =4=[=($3, _UTF-16LE'4 yr Degree')], =5=[=($2, _UTF-16LE'D')], =6=[=($3, _UTF-16LE'Primary')], =7=[=($2, _UTF-16LE'U')], =8=[=($3, _UTF-16LE'Advanced Degree')]) + HiveFilter(condition=[AND(IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) + HiveJoin(condition=[AND(=($0, $12), OR(AND($1, $22), AND($2, $23), AND($3, $24)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(r_reason_sk=[$0], r_reason_desc=[$2]) + HiveTableScan(table=[[default, reason]], table:alias=[reason]) + HiveJoin(condition=[=($12, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 1998)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) + HiveFilter(condition=[AND(IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) + HiveJoin(condition=[AND(=($9, $0), =($10, $5))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_item_sk=[$2], wr_refunded_cdemo_sk=[$4], wr_refunded_addr_sk=[$6], wr_returning_cdemo_sk=[$8], wr_reason_sk=[$12], wr_order_number=[$13], wr_fee=[$18], wr_refunded_cash=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($8), IS NOT NULL($6), IS NOT NULL($12))]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], ws_quantity=[$18], BETWEEN=[BETWEEN(false, $33, 100, 200)], BETWEEN6=[BETWEEN(false, $33, 150, 300)], BETWEEN7=[BETWEEN(false, $33, 50, 250)], BETWEEN8=[BETWEEN(false, $21, 100, 150)], BETWEEN9=[BETWEEN(false, $21, 50, 100)], BETWEEN10=[BETWEEN(false, $21, 150, 200)]) + HiveFilter(condition=[AND(OR(BETWEEN(false, $21, 100, 150), BETWEEN(false, $21, 50, 100), BETWEEN(false, $21, 150, 200)), OR(BETWEEN(false, $33, 100, 200), BETWEEN(false, $33, 150, 300), BETWEEN(false, $33, 50, 250)), IS NOT NULL($12), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query86.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query86.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query86.q.out new file mode 100644 index 0000000..8e89983 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query86.q.out @@ -0,0 +1,76 @@ +PREHOOK: query: explain cbo +select + sum(ws_net_paid) as total_sum + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ws_net_paid) desc) as rank_within_parent + from + web_sales + ,date_dim d1 + ,item + where + d1.d_month_seq between 1212 and 1212+11 + and d1.d_date_sk = ws_sold_date_sk + and i_item_sk = ws_item_sk + group by rollup(i_category,i_class) + order by + lochierarchy desc, + case when lochierarchy = 0 then i_category end, + rank_within_parent + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + sum(ws_net_paid) as total_sum + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ws_net_paid) desc) as rank_within_parent + from + web_sales + ,date_dim d1 + ,item + where + d1.d_month_seq between 1212 and 1212+11 + and d1.d_date_sk = ws_sold_date_sk + and i_item_sk = ws_item_sk + group by rollup(i_category,i_class) + order by + lochierarchy desc, + case when lochierarchy = 0 then i_category end, + rank_within_parent + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(total_sum=[$0], i_category=[$1], i_class=[$2], lochierarchy=[$3], rank_within_parent=[$4]) + HiveSortLimit(sort0=[$3], sort1=[$5], sort2=[$4], dir0=[DESC-nulls-last], dir1=[ASC], dir2=[ASC], fetch=[100]) + HiveProject(total_sum=[$2], i_category=[$0], i_class=[$1], lochierarchy=[+(grouping($3, 1), grouping($3, 0))], rank_within_parent=[rank() OVER (PARTITION BY +(grouping($3, 1), grouping($3, 0)), CASE(=(grouping($3, 0), 0), $0, null) ORDER BY $2 DESC NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], (tok_function when (= (tok_table_or_col lochierarchy) 0) (tok_table_or_col i_category))=[CASE(=(+(grouping($3, 1), grouping($3, 0)), 0), $0, null)]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], GROUPING__ID=[$3]) + HiveAggregate(group=[{0, 1}], groups=[[{0, 1}, {0}, {}]], agg#0=[sum($2)], GROUPING__ID=[GROUPING__ID()]) + HiveProject($f0=[$2], $f1=[$1], $f2=[$5]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_class=[$10], i_category=[$12]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_net_paid=[$29]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query87.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query87.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query87.q.out new file mode 100644 index 0000000..97c9edd --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query87.q.out @@ -0,0 +1,111 @@ +PREHOOK: query: explain cbo +select count(*) +from ((select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) + except + (select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) + except + (select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) +) cool_cust +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select count(*) +from ((select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) + except + (select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) + except + (select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1212 and 1212+11) +) cool_cust +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveFilter(condition=[AND(>($3, 0), =(*($3, 2), $4))]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[sum($4)]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f4=[$4], $f5=[*($3, $4)]) + HiveUnion(all=[true]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[2], $f4=[$3]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveFilter(condition=[AND(>($3, 0), =(*($3, 2), $4))]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[sum($4)]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f4=[$4], $f5=[*($3, $4)]) + HiveUnion(all=[true]) + HiveProject($f0=[$1], $f1=[$0], $f2=[$2], $f3=[2], $f4=[$3]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) + HiveProject(c_first_name=[$0], c_last_name=[$1], d_date=[$2]) + HiveAggregate(group=[{1, 2, 6}]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$1], $f1=[$0], $f2=[$2], $f3=[1], $f4=[$3]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) + HiveProject(c_first_name=[$0], c_last_name=[$1], d_date=[$2]) + HiveAggregate(group=[{1, 2, 6}]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$1], $f1=[$0], $f2=[$2], $f3=[1], $f4=[$3]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) + HiveProject(c_first_name=[$0], c_last_name=[$1], d_date=[$2]) + HiveAggregate(group=[{1, 2, 6}]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query88.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query88.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query88.q.out new file mode 100644 index 0000000..1ac8fe2 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query88.q.out @@ -0,0 +1,347 @@ +Warning: Shuffle Join MERGEJOIN[599][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[600][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[601][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[602][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 9' is a cross product +Warning: Shuffle Join MERGEJOIN[603][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 10' is a cross product +Warning: Shuffle Join MERGEJOIN[604][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 11' is a cross product +Warning: Shuffle Join MERGEJOIN[605][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7]] in Stage 'Reducer 12' is a cross product +PREHOOK: query: explain cbo +select * +from + (select count(*) h8_30_to_9 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s1, + (select count(*) h9_to_9_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s2, + (select count(*) h9_30_to_10 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s3, + (select count(*) h10_to_10_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s4, + (select count(*) h10_30_to_11 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s5, + (select count(*) h11_to_11_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s6, + (select count(*) h11_30_to_12 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s7, + (select count(*) h12_to_12_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 12 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s8 +PREHOOK: type: QUERY +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@time_dim +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select * +from + (select count(*) h8_30_to_9 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s1, + (select count(*) h9_to_9_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s2, + (select count(*) h9_30_to_10 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s3, + (select count(*) h10_to_10_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s4, + (select count(*) h10_30_to_11 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s5, + (select count(*) h11_to_11_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s6, + (select count(*) h11_30_to_12 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s7, + (select count(*) h12_to_12_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 12 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s8 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@time_dim +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject($f0=[$0], $f00=[$7], $f01=[$6], $f02=[$5], $f03=[$4], $f04=[$3], $f05=[$2], $f06=[$1]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[AND(IN($3, 3, 0, 1), <=($4, 5), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(t_time_sk=[$0]) + HiveFilter(condition=[AND(=($3, 8), >=($4, 30))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[=($5, _UTF-16LE'ese')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[AND(IN($3, 3, 0, 1), <=($4, 5), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(t_time_sk=[$0]) + HiveFilter(condition=[AND(=($3, 12), <($4, 30))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[=($5, _UTF-16LE'ese')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[AND(IN($3, 3, 0, 1), <=($4, 5), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(t_time_sk=[$0]) + HiveFilter(condition=[AND(=($3, 11), >=($4, 30))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[=($5, _UTF-16LE'ese')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[AND(IN($3, 3, 0, 1), <=($4, 5), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(t_time_sk=[$0]) + HiveFilter(condition=[AND(=($3, 11), <($4, 30))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[=($5, _UTF-16LE'ese')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[AND(IN($3, 3, 0, 1), <=($4, 5), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(t_time_sk=[$0]) + HiveFilter(condition=[AND(=($3, 10), >=($4, 30))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[=($5, _UTF-16LE'ese')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[AND(IN($3, 3, 0, 1), <=($4, 5), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(t_time_sk=[$0]) + HiveFilter(condition=[AND(=($3, 10), <($4, 30))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[=($5, _UTF-16LE'ese')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[AND(IN($3, 3, 0, 1), <=($4, 5), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(t_time_sk=[$0]) + HiveFilter(condition=[AND(=($3, 9), >=($4, 30))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[=($5, _UTF-16LE'ese')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[AND(IN($3, 3, 0, 1), <=($4, 5), OR(AND(=($3, 3), IS NOT NULL($4)), AND(=($3, 0), <=($4, 2)), AND(=($3, 1), <=($4, 3))))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(t_time_sk=[$0]) + HiveFilter(condition=[AND(=($3, 9), <($4, 30))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[=($5, _UTF-16LE'ese')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query89.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query89.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query89.q.out new file mode 100644 index 0000000..d9c7d42 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query89.q.out @@ -0,0 +1,87 @@ +PREHOOK: query: explain cbo +select * +from( +select i_category, i_class, i_brand, + s_store_name, s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + d_year in (2000) and + ((i_category in ('Home','Books','Electronics') and + i_class in ('wallpaper','parenting','musical') + ) + or (i_category in ('Shoes','Jewelry','Men') and + i_class in ('womens','birdal','pants') + )) +group by i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 +order by sum_sales - avg_monthly_sales, s_store_name +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select * +from( +select i_category, i_class, i_brand, + s_store_name, s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + d_year in (2000) and + ((i_category in ('Home','Books','Electronics') and + i_class in ('wallpaper','parenting','musical') + ) + or (i_category in ('Shoes','Jewelry','Men') and + i_class in ('womens','birdal','pants') + )) +group by i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 +order by sum_sales - avg_monthly_sales, s_store_name +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(i_category=[$0], i_class=[$1], i_brand=[$2], s_store_name=[$3], s_company_name=[$4], d_moy=[$5], sum_sales=[$6], avg_monthly_sales=[$7]) + HiveSortLimit(sort0=[$8], sort1=[$3], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(i_category=[$0], i_class=[$1], i_brand=[$2], s_store_name=[$3], s_company_name=[$4], d_moy=[$5], sum_sales=[$6], avg_monthly_sales=[$7], (- (tok_table_or_col sum_sales) (tok_table_or_col avg_monthly_sales))=[-($6, $7)]) + HiveFilter(condition=[CASE(<>($7, 0), >(/(ABS(-($6, $7)), $7), 0.1), null)]) + HiveProject((tok_table_or_col i_category)=[$2], (tok_table_or_col i_class)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col s_store_name)=[$4], (tok_table_or_col s_company_name)=[$5], (tok_table_or_col d_moy)=[$3], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], avg_window_0=[avg($6) OVER (PARTITION BY $2, $0, $4, $5 ORDER BY $2 NULLS FIRST, $0 NULLS FIRST, $4 NULLS FIRST, $5 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(i_brand=[$0], i_class=[$1], i_category=[$2], d_moy=[$3], s_store_name=[$4], s_company_name=[$5], $f6=[$6]) + HiveAggregate(group=[{5, 6, 7, 9, 11, 12}], agg#0=[sum($3)]) + HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_class=[$10], i_category=[$12]) + HiveFilter(condition=[AND(IN($10, _UTF-16LE'wallpaper', _UTF-16LE'parenting', _UTF-16LE'musical', _UTF-16LE'womens', _UTF-16LE'birdal', _UTF-16LE'pants'), IN($12, _UTF-16LE'Home', _UTF-16LE'Books', _UTF-16LE'Electronics', _UTF-16LE'Shoes', _UTF-16LE'Jewelry', _UTF-16LE'Men'), OR(AND(IN($12, _UTF-16LE'Home', _UTF-16LE'Books', _UTF-16LE'Electronics'), IN($10, _UTF-16LE'wallpaper', _UTF-16LE'parenting', _UTF-16LE'musical')), AND(IN($12, _UTF-16LE'Shoes', _UTF-16LE'Jewelry', _UTF-16LE'Men'), IN($10, _UTF-16LE'womens', _UTF-16LE'birdal', _UTF-16LE'pants'))))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_moy=[$8]) + HiveFilter(condition=[=($6, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_company_name=[$17]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query9.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query9.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query9.q.out new file mode 100644 index 0000000..4a92def --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query9.q.out @@ -0,0 +1,200 @@ +Warning: Shuffle Join MERGEJOIN[171][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[172][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[173][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[174][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[175][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[176][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[177][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[178][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8]] in Stage 'Reducer 9' is a cross product +Warning: Shuffle Join MERGEJOIN[179][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9]] in Stage 'Reducer 10' is a cross product +Warning: Shuffle Join MERGEJOIN[180][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10]] in Stage 'Reducer 11' is a cross product +Warning: Shuffle Join MERGEJOIN[181][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10, $hdt$_11]] in Stage 'Reducer 12' is a cross product +Warning: Shuffle Join MERGEJOIN[182][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10, $hdt$_11, $hdt$_12]] in Stage 'Reducer 13' is a cross product +Warning: Shuffle Join MERGEJOIN[183][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10, $hdt$_11, $hdt$_12, $hdt$_13]] in Stage 'Reducer 14' is a cross product +Warning: Shuffle Join MERGEJOIN[184][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10, $hdt$_11, $hdt$_12, $hdt$_13, $hdt$_14]] in Stage 'Reducer 15' is a cross product +Warning: Shuffle Join MERGEJOIN[185][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10, $hdt$_11, $hdt$_12, $hdt$_13, $hdt$_14, $hdt$_15]] in Stage 'Reducer 16' is a cross product +PREHOOK: query: explain cbo +select case when (select count(*) + from store_sales + where ss_quantity between 1 and 20) > 409437 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 1 and 20) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 1 and 20) end bucket1 , + case when (select count(*) + from store_sales + where ss_quantity between 21 and 40) > 4595804 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 21 and 40) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 21 and 40) end bucket2, + case when (select count(*) + from store_sales + where ss_quantity between 41 and 60) > 7887297 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 41 and 60) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 41 and 60) end bucket3, + case when (select count(*) + from store_sales + where ss_quantity between 61 and 80) > 10872978 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 61 and 80) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 61 and 80) end bucket4, + case when (select count(*) + from store_sales + where ss_quantity between 81 and 100) > 43571537 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 81 and 100) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 81 and 100) end bucket5 +from reason +where r_reason_sk = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@reason +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select case when (select count(*) + from store_sales + where ss_quantity between 1 and 20) > 409437 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 1 and 20) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 1 and 20) end bucket1 , + case when (select count(*) + from store_sales + where ss_quantity between 21 and 40) > 4595804 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 21 and 40) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 21 and 40) end bucket2, + case when (select count(*) + from store_sales + where ss_quantity between 41 and 60) > 7887297 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 41 and 60) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 41 and 60) end bucket3, + case when (select count(*) + from store_sales + where ss_quantity between 61 and 80) > 10872978 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 61 and 80) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 61 and 80) end bucket4, + case when (select count(*) + from store_sales + where ss_quantity between 81 and 100) > 43571537 + then (select avg(ss_ext_list_price) + from store_sales + where ss_quantity between 81 and 100) + else (select avg(ss_net_paid_inc_tax) + from store_sales + where ss_quantity between 81 and 100) end bucket5 +from reason +where r_reason_sk = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@reason +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(bucket1=[CASE($1, $2, $3)], bucket2=[CASE($4, $5, $6)], bucket3=[CASE($7, $8, $9)], bucket4=[CASE($10, $11, $12)], bucket5=[CASE($13, $14, $15)]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(r_reason_sk=[1]) + HiveFilter(condition=[=($0, 1)]) + HiveTableScan(table=[[default, reason]], table:alias=[reason]) + HiveProject(>=[>($0, 409437)]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveFilter(condition=[BETWEEN(false, $10, 1, 20)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($17)], agg#1=[count($17)]) + HiveFilter(condition=[BETWEEN(false, $10, 1, 20)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($21)], agg#1=[count($21)]) + HiveFilter(condition=[BETWEEN(false, $10, 1, 20)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(>=[>($0, 4595804)]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveFilter(condition=[BETWEEN(false, $10, 21, 40)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($17)], agg#1=[count($17)]) + HiveFilter(condition=[BETWEEN(false, $10, 21, 40)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($21)], agg#1=[count($21)]) + HiveFilter(condition=[BETWEEN(false, $10, 21, 40)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(>=[>($0, 7887297)]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveFilter(condition=[BETWEEN(false, $10, 41, 60)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($17)], agg#1=[count($17)]) + HiveFilter(condition=[BETWEEN(false, $10, 41, 60)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($21)], agg#1=[count($21)]) + HiveFilter(condition=[BETWEEN(false, $10, 41, 60)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(>=[>($0, 10872978)]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveFilter(condition=[BETWEEN(false, $10, 61, 80)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($17)], agg#1=[count($17)]) + HiveFilter(condition=[BETWEEN(false, $10, 61, 80)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($21)], agg#1=[count($21)]) + HiveFilter(condition=[BETWEEN(false, $10, 61, 80)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(>=[>($0, 43571537)]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveFilter(condition=[BETWEEN(false, $10, 81, 100)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($17)], agg#1=[count($17)]) + HiveFilter(condition=[BETWEEN(false, $10, 81, 100)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[/($0, $1)]) + HiveAggregate(group=[{}], agg#0=[sum($21)], agg#1=[count($21)]) + HiveFilter(condition=[BETWEEN(false, $10, 81, 100)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) +
