http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/cbo_query70.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query70.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query70.q.out new file mode 100644 index 0000000..aa04df8 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query70.q.out @@ -0,0 +1,119 @@ +PREHOOK: query: explain cbo +select + sum(ss_net_profit) as total_sum + ,s_state + ,s_county + ,grouping(s_state)+grouping(s_county) as lochierarchy + ,rank() over ( + partition by grouping(s_state)+grouping(s_county), + case when grouping(s_county) = 0 then s_state end + order by sum(ss_net_profit) desc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,store + where + d1.d_month_seq between 1212 and 1212+11 + and d1.d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + and s_state in + ( select s_state + from (select s_state as s_state, + rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking + from store_sales, store, date_dim + where d_month_seq between 1212 and 1212+11 + and d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + group by s_state + ) tmp1 + where ranking <= 5 + ) + group by rollup(s_state,s_county) + order by + lochierarchy desc + ,case when lochierarchy = 0 then s_state end + ,rank_within_parent + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select + sum(ss_net_profit) as total_sum + ,s_state + ,s_county + ,grouping(s_state)+grouping(s_county) as lochierarchy + ,rank() over ( + partition by grouping(s_state)+grouping(s_county), + case when grouping(s_county) = 0 then s_state end + order by sum(ss_net_profit) desc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,store + where + d1.d_month_seq between 1212 and 1212+11 + and d1.d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + and s_state in + ( select s_state + from (select s_state as s_state, + rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking + from store_sales, store, date_dim + where d_month_seq between 1212 and 1212+11 + and d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + group by s_state + ) tmp1 + where ranking <= 5 + ) + group by rollup(s_state,s_county) + order by + lochierarchy desc + ,case when lochierarchy = 0 then s_state end + ,rank_within_parent + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(total_sum=[$0], s_state=[$1], s_county=[$2], lochierarchy=[$3], rank_within_parent=[$4]) + HiveSortLimit(sort0=[$3], sort1=[$5], sort2=[$4], dir0=[DESC-nulls-last], dir1=[ASC], dir2=[ASC], fetch=[100]) + HiveProject(total_sum=[$2], s_state=[$0], s_county=[$1], lochierarchy=[+(grouping($3, 1), grouping($3, 0))], rank_within_parent=[rank() OVER (PARTITION BY +(grouping($3, 1), grouping($3, 0)), CASE(=(grouping($3, 0), 0), $0, null) ORDER BY $2 DESC NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], (tok_function when (= (tok_table_or_col lochierarchy) 0) (tok_table_or_col s_state))=[CASE(=(+(grouping($3, 1), grouping($3, 0)), 0), $0, null)]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], GROUPING__ID=[$3]) + HiveAggregate(group=[{0, 1}], groups=[[{0, 1}, {0}, {}]], agg#0=[sum($2)], GROUPING__ID=[GROUPING__ID()]) + HiveProject($f0=[$7], $f1=[$6], $f2=[$2]) + HiveJoin(condition=[=($7, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(s_store_sk=[$0], s_county=[$23], s_state=[$24]) + HiveFilter(condition=[AND(IS NOT NULL($24), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(s_state=[$0]) + HiveFilter(condition=[<=($1, 5)]) + HiveProject((tok_table_or_col s_state)=[$0], rank_window_0=[$1]) + HiveProject((tok_table_or_col s_state)=[$0], rank_window_0=[rank() OVER (PARTITION BY $0 ORDER BY $1 DESC NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], window_col_0=[$1]) + HiveProject(s_state=[$0], $f1=[$1]) + HiveAggregate(group=[{6}], agg#0=[sum($2)]) + HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_state=[$24]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($24))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) +
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/cbo_query71.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query71.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query71.q.out new file mode 100644 index 0000000..4e52893 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query71.q.out @@ -0,0 +1,130 @@ +PREHOOK: query: explain cbo +select i_brand_id brand_id, i_brand brand,t_hour,t_minute, + sum(ext_price) ext_price + from item, (select ws_ext_sales_price as ext_price, + ws_sold_date_sk as sold_date_sk, + ws_item_sk as sold_item_sk, + ws_sold_time_sk as time_sk + from web_sales,date_dim + where d_date_sk = ws_sold_date_sk + and d_moy=12 + and d_year=2001 + union all + select cs_ext_sales_price as ext_price, + cs_sold_date_sk as sold_date_sk, + cs_item_sk as sold_item_sk, + cs_sold_time_sk as time_sk + from catalog_sales,date_dim + where d_date_sk = cs_sold_date_sk + and d_moy=12 + and d_year=2001 + union all + select ss_ext_sales_price as ext_price, + ss_sold_date_sk as sold_date_sk, + ss_item_sk as sold_item_sk, + ss_sold_time_sk as time_sk + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + and d_moy=12 + and d_year=2001 + ) as tmp,time_dim + where + sold_item_sk = i_item_sk + and i_manager_id=1 + and time_sk = t_time_sk + and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') + group by i_brand, i_brand_id,t_hour,t_minute + order by ext_price desc, i_brand_id +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@time_dim +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_brand_id brand_id, i_brand brand,t_hour,t_minute, + sum(ext_price) ext_price + from item, (select ws_ext_sales_price as ext_price, + ws_sold_date_sk as sold_date_sk, + ws_item_sk as sold_item_sk, + ws_sold_time_sk as time_sk + from web_sales,date_dim + where d_date_sk = ws_sold_date_sk + and d_moy=12 + and d_year=2001 + union all + select cs_ext_sales_price as ext_price, + cs_sold_date_sk as sold_date_sk, + cs_item_sk as sold_item_sk, + cs_sold_time_sk as time_sk + from catalog_sales,date_dim + where d_date_sk = cs_sold_date_sk + and d_moy=12 + and d_year=2001 + union all + select ss_ext_sales_price as ext_price, + ss_sold_date_sk as sold_date_sk, + ss_item_sk as sold_item_sk, + ss_sold_time_sk as time_sk + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + and d_moy=12 + and d_year=2001 + ) as tmp,time_dim + where + sold_item_sk = i_item_sk + and i_manager_id=1 + and time_sk = t_time_sk + and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') + group by i_brand, i_brand_id,t_hour,t_minute + order by ext_price desc, i_brand_id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@time_dim +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(brand_id=[$0], brand=[$1], t_hour=[$2], t_minute=[$3], ext_price=[$4]) + HiveSortLimit(sort0=[$4], sort1=[$5], dir0=[DESC-nulls-last], dir1=[ASC]) + HiveProject(brand_id=[$2], brand=[$3], t_hour=[$0], t_minute=[$1], ext_price=[$4], (tok_table_or_col i_brand_id)=[$2]) + HiveAggregate(group=[{1, 2, 8, 9}], agg#0=[sum($4)]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(t_time_sk=[$0], t_hour=[$3], t_minute=[$4], t_meal_time=[$9]) + HiveFilter(condition=[AND(IN($9, _UTF-16LE'breakfast', _UTF-16LE'dinner'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ext_price=[$0], sold_item_sk=[$1], time_sk=[$2]) + HiveUnion(all=[true]) + HiveProject(ext_price=[$3], sold_item_sk=[$2], time_sk=[$1]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_sold_time_sk=[$1], ws_item_sk=[$3], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3), IS NOT NULL($1))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER], d_moy=[CAST(12):INTEGER]) + HiveFilter(condition=[AND(=($8, 12), =($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ext_price=[$3], sold_item_sk=[$2], time_sk=[$1]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_sold_time_sk=[$1], cs_item_sk=[$15], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($15), IS NOT NULL($1))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER], d_moy=[CAST(12):INTEGER]) + HiveFilter(condition=[AND(=($8, 12), =($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ext_price=[$3], sold_item_sk=[$2], time_sk=[$1]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_sold_time_sk=[$1], ss_item_sk=[$2], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2), IS NOT NULL($1))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER], d_moy=[CAST(12):INTEGER]) + HiveFilter(condition=[AND(=($8, 12), =($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_brand=[$8], i_manager_id=[CAST(1):INTEGER]) + HiveFilter(condition=[AND(=($20, 1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out new file mode 100644 index 0000000..fca31ef --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out @@ -0,0 +1,130 @@ +PREHOOK: query: explain cbo +select i_item_desc + ,w_warehouse_name + ,d1.d_week_seq + ,count(case when p_promo_sk is null then 1 else 0 end) no_promo + ,count(case when p_promo_sk is not null then 1 else 0 end) promo + ,count(*) total_cnt +from catalog_sales +join inventory on (cs_item_sk = inv_item_sk) +join warehouse on (w_warehouse_sk=inv_warehouse_sk) +join item on (i_item_sk = cs_item_sk) +join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) +join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) +join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) +join date_dim d2 on (inv_date_sk = d2.d_date_sk) +join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) +left outer join promotion on (cs_promo_sk=p_promo_sk) +left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) +where d1.d_week_seq = d2.d_week_seq + and inv_quantity_on_hand < cs_quantity + and d3.d_date > d1.d_date + 5 + and hd_buy_potential = '1001-5000' + and d1.d_year = 2001 + and hd_buy_potential = '1001-5000' + and cd_marital_status = 'M' + and d1.d_year = 2001 +group by i_item_desc,w_warehouse_name,d1.d_week_seq +order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@inventory +PREHOOK: Input: default@item +PREHOOK: Input: default@promotion +PREHOOK: Input: default@warehouse +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select i_item_desc + ,w_warehouse_name + ,d1.d_week_seq + ,count(case when p_promo_sk is null then 1 else 0 end) no_promo + ,count(case when p_promo_sk is not null then 1 else 0 end) promo + ,count(*) total_cnt +from catalog_sales +join inventory on (cs_item_sk = inv_item_sk) +join warehouse on (w_warehouse_sk=inv_warehouse_sk) +join item on (i_item_sk = cs_item_sk) +join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) +join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) +join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) +join date_dim d2 on (inv_date_sk = d2.d_date_sk) +join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) +left outer join promotion on (cs_promo_sk=p_promo_sk) +left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) +where d1.d_week_seq = d2.d_week_seq + and inv_quantity_on_hand < cs_quantity + and d3.d_date > d1.d_date + 5 + and hd_buy_potential = '1001-5000' + and d1.d_year = 2001 + and hd_buy_potential = '1001-5000' + and cd_marital_status = 'M' + and d1.d_year = 2001 +group by i_item_desc,w_warehouse_name,d1.d_week_seq +order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@inventory +POSTHOOK: Input: default@item +POSTHOOK: Input: default@promotion +POSTHOOK: Input: default@warehouse +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$5], sort1=[$0], sort2=[$1], sort3=[$2], dir0=[DESC-nulls-last], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)], agg#1=[count($4)], agg#2=[count()]) + HiveProject($f0=[$15], $f1=[$13], $f2=[$22], $f3=[CASE(IS NULL($28), 1, 0)], $f4=[CASE(IS NOT NULL($28), 1, 0)]) + HiveJoin(condition=[AND(=($29, $4), =($30, $6))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$10], cs_ship_date_sk=[$11], cs_bill_cdemo_sk=[$12], cs_bill_hdemo_sk=[$13], cs_item_sk=[$14], cs_promo_sk=[$15], cs_order_number=[$16], cs_quantity=[$17], inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3], w_warehouse_sk=[$4], w_warehouse_name=[$5], i_item_sk=[$8], i_item_desc=[$9], cd_demo_sk=[$22], cd_marital_status=[$23], hd_demo_sk=[$24], hd_buy_potential=[$25], d_date_sk=[$18], d_date=[$19], d_week_seq=[$20], d_year=[$21], d_date_sk0=[$27], d_week_seq0=[$28], d_date_sk1=[$6], d_date0=[$7], p_promo_sk=[$26]) + HiveJoin(condition=[AND(=($0, $27), =($20, $28))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($14, $1), <($3, $17))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) + HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) + HiveProject(d_date_sk=[$0], d_date=[$1], i_item_sk=[$2], i_item_desc=[$3], cs_sold_date_sk=[$4], cs_ship_date_sk=[$5], cs_bill_cdemo_sk=[$6], cs_bill_hdemo_sk=[$7], cs_item_sk=[$8], cs_promo_sk=[$9], cs_order_number=[$10], cs_quantity=[$11], d_date_sk0=[$12], d_date0=[$13], d_week_seq=[$14], d_year=[$15], cd_demo_sk=[$16], cd_marital_status=[$17], hd_demo_sk=[$18], hd_buy_potential=[$19], p_promo_sk=[$20]) + HiveJoin(condition=[AND(=($5, $0), >(CAST($1):DOUBLE, +(CAST($13):DOUBLE, 5)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_desc=[$4]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($5, $16)], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $14)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_ship_date_sk=[$2], cs_bill_cdemo_sk=[$4], cs_bill_hdemo_sk=[$5], cs_item_sk=[$15], cs_promo_sk=[$16], cs_order_number=[$17], cs_quantity=[$18]) + HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($4), IS NOT NULL($5), IS NOT NULL($0), IS NOT NULL($2))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2], d_week_seq=[$4], d_year=[CAST(2001):INTEGER]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[CAST(_UTF-16LE'M'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'M'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(hd_demo_sk=[$0], hd_buy_potential=[CAST(_UTF-16LE'1001-5000'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'1001-5000'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(p_promo_sk=[$0]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveProject(d_date_sk=[$0], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/cbo_query73.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query73.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query73.q.out new file mode 100644 index 0000000..d28a896 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query73.q.out @@ -0,0 +1,93 @@ +PREHOOK: query: explain cbo +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = 'unknown') + and household_demographics.hd_vehicle_count > 0 + and case when household_demographics.hd_vehicle_count > 0 then + household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Mobile County','Maverick County','Huron County','Kittitas County') + group by ss_ticket_number,ss_customer_sk) dj,customer + where ss_customer_sk = c_customer_sk + and cnt between 1 and 5 + order by cnt desc +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = 'unknown') + and household_demographics.hd_vehicle_count > 0 + and case when household_demographics.hd_vehicle_count > 0 then + household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Mobile County','Maverick County','Huron County','Kittitas County') + group by ss_ticket_number,ss_customer_sk) dj,customer + where ss_customer_sk = c_customer_sk + and cnt between 1 and 5 + order by cnt desc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$5], dir0=[DESC-nulls-last]) + HiveProject(c_last_name=[$3], c_first_name=[$2], c_salutation=[$1], c_preferred_cust_flag=[$4], ss_ticket_number=[$5], cnt=[$7]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_salutation=[$7], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_ticket_number=[$0], ss_customer_sk=[$1], $f2=[$2]) + HiveFilter(condition=[BETWEEN(false, $2, 1, 5)]) + HiveProject(ss_ticket_number=[$1], ss_customer_sk=[$0], $f2=[$2]) + HiveAggregate(group=[{1, 4}], agg#0=[count()]) + HiveJoin(condition=[=($3, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_hdemo_sk=[$5], ss_store_sk=[$7], ss_ticket_number=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($5), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_dom=[$9]) + HiveFilter(condition=[AND(IN($6, 2000, 2001, 2002), BETWEEN(false, $9, 1, 2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(hd_demo_sk=[$0], hd_buy_potential=[$2], hd_dep_count=[$3], hd_vehicle_count=[$4]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'>10000', _UTF-16LE'unknown'), >($4, 0), CASE(>($4, 0), >(/(CAST($3):DOUBLE, CAST($4):DOUBLE), 1), null), IS NOT NULL($0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(s_store_sk=[$0], s_county=[$23]) + HiveFilter(condition=[AND(IN($23, _UTF-16LE'Mobile County', _UTF-16LE'Maverick County', _UTF-16LE'Huron County', _UTF-16LE'Kittitas County'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/cbo_query74.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query74.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query74.q.out new file mode 100644 index 0000000..32d6e03 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query74.q.out @@ -0,0 +1,191 @@ +PREHOOK: query: explain cbo +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ss_net_paid) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ws_net_paid) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ) + select + t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.year = 2001 + and t_s_secyear.year = 2001+1 + and t_w_firstyear.year = 2001 + and t_w_secyear.year = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by 2,1,3 +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ss_net_paid) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,max(ws_net_paid) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ) + select + t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.year = 2001 + and t_s_secyear.year = 2001+1 + and t_w_firstyear.year = 2001 + and t_w_secyear.year = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by 2,1,3 +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$1], sort1=[$0], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) + HiveProject(customer_id=[$0], customer_first_name=[$1], customer_last_name=[$2]) + HiveJoin(condition=[AND(=($0, $6), CASE(CAST(IS NOT NULL($7)):BOOLEAN, CASE(CAST(IS NOT NULL($9)):BOOLEAN, >(/($5, $9), /($3, $7)), >(null, /($3, $7))), CASE(CAST(IS NOT NULL($9)):BOOLEAN, >(/($5, $9), null), null)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(customer_id=[$0], customer_first_name=[$1], customer_last_name=[$2], year_total=[$4]) + HiveAggregate(group=[{1, 2, 3, 8}], agg#0=[max($6)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_net_paid=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2002):INTEGER]) + HiveFilter(condition=[AND(IN($6, 2001, 2002), =($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(customer_id=[$0], year_total=[$4]) + HiveAggregate(group=[{1, 2, 3, 8}], agg#0=[max($6)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_net_paid=[$29]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2002):INTEGER]) + HiveFilter(condition=[AND(IN($6, 2001, 2002), =($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(customer_id=[$0], year_total=[$4]) + HiveFilter(condition=[>($4, 0)]) + HiveAggregate(group=[{1, 2, 3, 8}], agg#0=[max($6)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_net_paid=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER]) + HiveFilter(condition=[AND(IN($6, 2001, 2002), =($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(customer_id=[$0], year_total=[$4]) + HiveFilter(condition=[>($4, 0)]) + HiveAggregate(group=[{1, 2, 3, 8}], agg#0=[max($6)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_net_paid=[$29]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER]) + HiveFilter(condition=[AND(IN($6, 2001, 2002), =($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/cbo_query75.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query75.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query75.q.out new file mode 100644 index 0000000..3d87d1b --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query75.q.out @@ -0,0 +1,278 @@ +PREHOOK: query: explain cbo +WITH all_sales AS ( + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,SUM(sales_cnt) AS sales_cnt + ,SUM(sales_amt) AS sales_amt + FROM (SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt + ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt + FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk + JOIN date_dim ON d_date_sk=cs_sold_date_sk + LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number + AND cs_item_sk=cr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt + ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt + FROM store_sales JOIN item ON i_item_sk=ss_item_sk + JOIN date_dim ON d_date_sk=ss_sold_date_sk + LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number + AND ss_item_sk=sr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt + ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt + FROM web_sales JOIN item ON i_item_sk=ws_item_sk + JOIN date_dim ON d_date_sk=ws_sold_date_sk + LEFT JOIN web_returns ON (ws_order_number=wr_order_number + AND ws_item_sk=wr_item_sk) + WHERE i_category='Sports') sales_detail + GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) + SELECT prev_yr.d_year AS prev_year + ,curr_yr.d_year AS year + ,curr_yr.i_brand_id + ,curr_yr.i_class_id + ,curr_yr.i_category_id + ,curr_yr.i_manufact_id + ,prev_yr.sales_cnt AS prev_yr_cnt + ,curr_yr.sales_cnt AS curr_yr_cnt + ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff + ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff + FROM all_sales curr_yr, all_sales prev_yr + WHERE curr_yr.i_brand_id=prev_yr.i_brand_id + AND curr_yr.i_class_id=prev_yr.i_class_id + AND curr_yr.i_category_id=prev_yr.i_category_id + AND curr_yr.i_manufact_id=prev_yr.i_manufact_id + AND curr_yr.d_year=2002 + AND prev_yr.d_year=2002-1 + AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 + ORDER BY sales_cnt_diff + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +WITH all_sales AS ( + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,SUM(sales_cnt) AS sales_cnt + ,SUM(sales_amt) AS sales_amt + FROM (SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt + ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt + FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk + JOIN date_dim ON d_date_sk=cs_sold_date_sk + LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number + AND cs_item_sk=cr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt + ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt + FROM store_sales JOIN item ON i_item_sk=ss_item_sk + JOIN date_dim ON d_date_sk=ss_sold_date_sk + LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number + AND ss_item_sk=sr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt + ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt + FROM web_sales JOIN item ON i_item_sk=ws_item_sk + JOIN date_dim ON d_date_sk=ws_sold_date_sk + LEFT JOIN web_returns ON (ws_order_number=wr_order_number + AND ws_item_sk=wr_item_sk) + WHERE i_category='Sports') sales_detail + GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) + SELECT prev_yr.d_year AS prev_year + ,curr_yr.d_year AS year + ,curr_yr.i_brand_id + ,curr_yr.i_class_id + ,curr_yr.i_category_id + ,curr_yr.i_manufact_id + ,prev_yr.sales_cnt AS prev_yr_cnt + ,curr_yr.sales_cnt AS curr_yr_cnt + ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff + ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff + FROM all_sales curr_yr, all_sales prev_yr + WHERE curr_yr.i_brand_id=prev_yr.i_brand_id + AND curr_yr.i_class_id=prev_yr.i_class_id + AND curr_yr.i_category_id=prev_yr.i_category_id + AND curr_yr.i_manufact_id=prev_yr.i_manufact_id + AND curr_yr.d_year=2002 + AND prev_yr.d_year=2002-1 + AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 + ORDER BY sales_cnt_diff + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(prev_year=[CAST(2001):INTEGER], year=[CAST(2002):INTEGER], i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], prev_yr_cnt=[$4], curr_yr_cnt=[$5], sales_cnt_diff=[$6], sales_amt_diff=[$7]) + HiveSortLimit(sort0=[$6], dir0=[ASC], fetch=[100]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], prev_yr_cnt=[$4], curr_yr_cnt=[$10], sales_cnt_diff=[-($10, $4)], sales_amt_diff=[-($11, $5)]) + HiveJoin(condition=[AND(AND(AND(AND(=($6, $0), =($7, $1)), =($8, $2)), =($9, $3)), <(/(CAST($10):DECIMAL(17, 2), CAST($4):DECIMAL(17, 2)), 0.9))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], $f4=[$4], $f5=[$5]) + HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[sum($4)], agg#1=[sum($5)]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5}]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5}]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$12], i_class_id=[$13], i_category_id=[$14], i_manufact_id=[$16], sales_cnt=[-($7, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($8, CASE(IS NOT NULL($3), $3, 0))]) + HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[right], algorithm=[none], cost=[not available]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_return_quantity=[$17], cr_return_amount=[$18]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_order_number=[$17], cs_quantity=[$18], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_category=[CAST(_UTF-16LE'Sports'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_brand_id=[$12], i_class_id=[$13], i_category_id=[$14], i_manufact_id=[$16], sales_cnt=[-($7, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($8, CASE(IS NOT NULL($3), $3, 0))]) + HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[right], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9], sr_return_quantity=[$10], sr_return_amt=[$11]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ticket_number=[$9], ss_quantity=[$10], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_category=[CAST(_UTF-16LE'Sports'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_brand_id=[$12], i_class_id=[$13], i_category_id=[$14], i_manufact_id=[$16], sales_cnt=[-($7, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($8, CASE(IS NOT NULL($3), $3, 0))]) + HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[right], algorithm=[none], cost=[not available]) + HiveProject(wr_item_sk=[$2], wr_order_number=[$13], wr_return_quantity=[$14], wr_return_amt=[$15]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], ws_quantity=[$18], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_category=[CAST(_UTF-16LE'Sports'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], $f4=[$4], $f5=[$5]) + HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[sum($4)], agg#1=[sum($5)]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5}]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) + HiveAggregate(group=[{0, 1, 2, 3, 4, 5}]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$12], i_class_id=[$13], i_category_id=[$14], i_manufact_id=[$16], sales_cnt=[-($7, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($8, CASE(IS NOT NULL($3), $3, 0))]) + HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[right], algorithm=[none], cost=[not available]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_return_quantity=[$17], cr_return_amount=[$18]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_order_number=[$17], cs_quantity=[$18], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2002):INTEGER]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_category=[CAST(_UTF-16LE'Sports'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_brand_id=[$12], i_class_id=[$13], i_category_id=[$14], i_manufact_id=[$16], sales_cnt=[-($7, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($8, CASE(IS NOT NULL($3), $3, 0))]) + HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[right], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9], sr_return_quantity=[$10], sr_return_amt=[$11]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ticket_number=[$9], ss_quantity=[$10], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2002):INTEGER]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_category=[CAST(_UTF-16LE'Sports'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_brand_id=[$12], i_class_id=[$13], i_category_id=[$14], i_manufact_id=[$16], sales_cnt=[-($7, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($8, CASE(IS NOT NULL($3), $3, 0))]) + HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[right], algorithm=[none], cost=[not available]) + HiveProject(wr_item_sk=[$2], wr_order_number=[$13], wr_return_quantity=[$14], wr_return_amt=[$15]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], ws_quantity=[$18], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_year=[CAST(2002):INTEGER]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_category=[CAST(_UTF-16LE'Sports'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($11), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/cbo_query76.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query76.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query76.q.out new file mode 100644 index 0000000..74f888c --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query76.q.out @@ -0,0 +1,101 @@ +PREHOOK: query: explain cbo +select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( + SELECT 'store' as channel, 'ss_addr_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price + FROM store_sales, item, date_dim + WHERE ss_addr_sk IS NULL + AND ss_sold_date_sk=d_date_sk + AND ss_item_sk=i_item_sk + UNION ALL + SELECT 'web' as channel, 'ws_web_page_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price + FROM web_sales, item, date_dim + WHERE ws_web_page_sk IS NULL + AND ws_sold_date_sk=d_date_sk + AND ws_item_sk=i_item_sk + UNION ALL + SELECT 'catalog' as channel, 'cs_warehouse_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price + FROM catalog_sales, item, date_dim + WHERE cs_warehouse_sk IS NULL + AND cs_sold_date_sk=d_date_sk + AND cs_item_sk=i_item_sk) foo +GROUP BY channel, col_name, d_year, d_qoy, i_category +ORDER BY channel, col_name, d_year, d_qoy, i_category +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( + SELECT 'store' as channel, 'ss_addr_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price + FROM store_sales, item, date_dim + WHERE ss_addr_sk IS NULL + AND ss_sold_date_sk=d_date_sk + AND ss_item_sk=i_item_sk + UNION ALL + SELECT 'web' as channel, 'ws_web_page_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price + FROM web_sales, item, date_dim + WHERE ws_web_page_sk IS NULL + AND ws_sold_date_sk=d_date_sk + AND ws_item_sk=i_item_sk + UNION ALL + SELECT 'catalog' as channel, 'cs_warehouse_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price + FROM catalog_sales, item, date_dim + WHERE cs_warehouse_sk IS NULL + AND cs_sold_date_sk=d_date_sk + AND cs_item_sk=i_item_sk) foo +GROUP BY channel, col_name, d_year, d_qoy, i_category +ORDER BY channel, col_name, d_year, d_qoy, i_category +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], fetch=[100]) + HiveProject(channel=[$0], col_name=[$1], d_year=[$2], d_qoy=[$3], i_category=[$4], $f5=[$5], $f6=[$6]) + HiveAggregate(group=[{0, 1, 2, 3, 4}], agg#0=[count()], agg#1=[sum($5)]) + HiveProject(channel=[$0], col_name=[$1], d_year=[$2], d_qoy=[$3], i_category=[$4], ext_sales_price=[$5]) + HiveUnion(all=[true]) + HiveProject(channel=[_UTF-16LE'store'], col_name=[_UTF-16LE'ss_addr_sk'], d_year=[$1], d_qoy=[$2], i_category=[$4], ext_sales_price=[$8]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_qoy=[$10]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_category=[$12]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_addr_sk=[null], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NULL($6), IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(channel=[_UTF-16LE'web'], col_name=[_UTF-16LE'ws_web_page_sk'], d_year=[$7], d_qoy=[$8], i_category=[$5], ext_sales_price=[$3]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_web_page_sk=[null], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NULL($12), IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(i_item_sk=[$0], i_category=[$12]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_qoy=[$10]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(channel=[_UTF-16LE'catalog'], col_name=[_UTF-16LE'cs_warehouse_sk'], d_year=[$7], d_qoy=[$8], i_category=[$5], ext_sales_price=[$3]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_warehouse_sk=[null], cs_item_sk=[$15], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NULL($14), IS NOT NULL($15), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(i_item_sk=[$0], i_category=[$12]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_qoy=[$10]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/cbo_query77.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query77.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query77.q.out new file mode 100644 index 0000000..2c42995 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query77.q.out @@ -0,0 +1,316 @@ +Warning: Shuffle Join MERGEJOIN[317][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product +PREHOOK: query: explain cbo +with ss as + (select s_store_sk, + sum(ss_ext_sales_price) as sales, + sum(ss_net_profit) as profit + from store_sales, + date_dim, + store + where ss_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ss_store_sk = s_store_sk + group by s_store_sk) + , + sr as + (select s_store_sk, + sum(sr_return_amt) as returns, + sum(sr_net_loss) as profit_loss + from store_returns, + date_dim, + store + where sr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and sr_store_sk = s_store_sk + group by s_store_sk), + cs as + (select cs_call_center_sk, + sum(cs_ext_sales_price) as sales, + sum(cs_net_profit) as profit + from catalog_sales, + date_dim + where cs_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + group by cs_call_center_sk + ), + cr as + (select + sum(cr_return_amount) as returns, + sum(cr_net_loss) as profit_loss + from catalog_returns, + date_dim + where cr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + ), + ws as + ( select wp_web_page_sk, + sum(ws_ext_sales_price) as sales, + sum(ws_net_profit) as profit + from web_sales, + date_dim, + web_page + where ws_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ws_web_page_sk = wp_web_page_sk + group by wp_web_page_sk), + wr as + (select wp_web_page_sk, + sum(wr_return_amt) as returns, + sum(wr_net_loss) as profit_loss + from web_returns, + date_dim, + web_page + where wr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and wr_web_page_sk = wp_web_page_sk + group by wp_web_page_sk) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , ss.s_store_sk as id + , sales + , coalesce(returns, 0) as returns + , (profit - coalesce(profit_loss,0)) as profit + from ss left join sr + on ss.s_store_sk = sr.s_store_sk + union all + select 'catalog channel' as channel + , cs_call_center_sk as id + , sales + , returns + , (profit - profit_loss) as profit + from cs + , cr + union all + select 'web channel' as channel + , ws.wp_web_page_sk as id + , sales + , coalesce(returns, 0) returns + , (profit - coalesce(profit_loss,0)) as profit + from ws left join wr + on ws.wp_web_page_sk = wr.wp_web_page_sk + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_page +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ss as + (select s_store_sk, + sum(ss_ext_sales_price) as sales, + sum(ss_net_profit) as profit + from store_sales, + date_dim, + store + where ss_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ss_store_sk = s_store_sk + group by s_store_sk) + , + sr as + (select s_store_sk, + sum(sr_return_amt) as returns, + sum(sr_net_loss) as profit_loss + from store_returns, + date_dim, + store + where sr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and sr_store_sk = s_store_sk + group by s_store_sk), + cs as + (select cs_call_center_sk, + sum(cs_ext_sales_price) as sales, + sum(cs_net_profit) as profit + from catalog_sales, + date_dim + where cs_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + group by cs_call_center_sk + ), + cr as + (select + sum(cr_return_amount) as returns, + sum(cr_net_loss) as profit_loss + from catalog_returns, + date_dim + where cr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + ), + ws as + ( select wp_web_page_sk, + sum(ws_ext_sales_price) as sales, + sum(ws_net_profit) as profit + from web_sales, + date_dim, + web_page + where ws_sold_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and ws_web_page_sk = wp_web_page_sk + group by wp_web_page_sk), + wr as + (select wp_web_page_sk, + sum(wr_return_amt) as returns, + sum(wr_net_loss) as profit_loss + from web_returns, + date_dim, + web_page + where wr_returned_date_sk = d_date_sk + and d_date between cast('1998-08-04' as date) + and (cast('1998-08-04' as date) + 30 days) + and wr_web_page_sk = wp_web_page_sk + group by wp_web_page_sk) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , ss.s_store_sk as id + , sales + , coalesce(returns, 0) as returns + , (profit - coalesce(profit_loss,0)) as profit + from ss left join sr + on ss.s_store_sk = sr.s_store_sk + union all + select 'catalog channel' as channel + , cs_call_center_sk as id + , sales + , returns + , (profit - profit_loss) as profit + from cs + , cr + union all + select 'web channel' as channel + , ws.wp_web_page_sk as id + , sales + , coalesce(returns, 0) returns + , (profit - coalesce(profit_loss,0)) as profit + from ws left join wr + on ws.wp_web_page_sk = wr.wp_web_page_sk + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_page +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(channel=[$0], id=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) + HiveAggregate(group=[{0, 1}], groups=[[{0, 1}, {0}, {}]], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)]) + HiveProject(channel=[$0], id=[$1], sales=[$2], returns=[$3], profit=[$4]) + HiveUnion(all=[true]) + HiveProject(channel=[_UTF-16LE'store channel'], id=[$0], sales=[$1], returns=[CASE(IS NOT NULL($4), $4, 0)], profit=[-($2, CASE(IS NOT NULL($5), $5, 0))]) + HiveJoin(condition=[=($0, $3)], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{6}], agg#0=[sum($2)], agg#1=[sum($3)]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_ext_sales_price=[$15], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-09-03 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(s_store_sk=[$0], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{0}], agg#0=[sum($3)], agg#1=[sum($4)]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_returned_date_sk=[$0], sr_store_sk=[$7], sr_return_amt=[$11], sr_net_loss=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-09-03 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(channel=[_UTF-16LE'catalog channel'], id=[$0], sales=[$1], returns=[$3], profit=[-($2, $4)]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_call_center_sk=[$0], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[sum($3)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_call_center_sk=[$11], cs_ext_sales_price=[$23], cs_net_profit=[$33]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-09-03 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f1=[$1]) + HiveAggregate(group=[{}], agg#0=[sum($1)], agg#1=[sum($2)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cr_returned_date_sk=[$0], cr_return_amount=[$18], cr_net_loss=[$26]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-09-03 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(channel=[_UTF-16LE'web channel'], id=[$0], sales=[$1], returns=[CASE(IS NOT NULL($4), $4, 0)], profit=[-($2, CASE(IS NOT NULL($5), $5, 0))]) + HiveJoin(condition=[=($0, $3)], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(wp_web_page_sk=[$0], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{0}], agg#0=[sum($3)], agg#1=[sum($4)]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wp_web_page_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_page]], table:alias=[web_page]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_web_page_sk=[$12], ws_ext_sales_price=[$23], ws_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-09-03 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(wp_web_page_sk=[$0], $f1=[$1], $f2=[$2]) + HiveAggregate(group=[{0}], agg#0=[sum($3)], agg#1=[sum($4)]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wp_web_page_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_page]], table:alias=[web_page]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_returned_date_sk=[$0], wr_web_page_sk=[$11], wr_return_amt=[$15], wr_net_loss=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($11))]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00, 1998-09-03 00:00:00), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) +
