Vineet Garg created HIVE-21021:
----------------------------------

             Summary: Scalar subquery with only aggregate in subquery (no group 
by) has unnecessary sq_count_check branch
                 Key: HIVE-21021
                 URL: https://issues.apache.org/jira/browse/HIVE-21021
             Project: Hive
          Issue Type: Improvement
    Affects Versions: 3.0.0
            Reporter: Vineet Garg
            Assignee: Vineet Garg


{code:sql}
CREATE TABLE `store_sales`(
  `ss_sold_date_sk` int,
  `ss_quantity` int,
  `ss_list_price` decimal(7,2));



CREATE TABLE `date_dim`(
  `d_date_sk` int,
  `d_year` int);

explain cbo with avg_sales as
 (select avg(quantity*list_price) average_sales
  from (select ss_quantity quantity
             ,ss_list_price list_price
       from store_sales
           ,date_dim
       where ss_sold_date_sk = d_date_sk
         and d_year between 1999 and 2001 ) x)
select * from store_sales where ss_list_price > (select average_sales from 
avg_sales);
{code}

{noformat}
CBO PLAN:
HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
  HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[{2.0 
rows, 0.0 cpu, 0.0 io}])
    HiveJoin(condition=[>($2, $3)], joinType=[inner], algorithm=[none], 
cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
      HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2])
        HiveTableScan(table=[[sub, store_sales]], table:alias=[store_sales])
      HiveProject($f0=[/($0, $1)])
        HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[count($0)])
          HiveProject($f0=[*(CAST($1):DECIMAL(10, 0), $2)])
            HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], 
cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
              HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
ss_list_price=[$2])
                HiveFilter(condition=[IS NOT NULL($0)])
                  HiveTableScan(table=[[sub, store_sales]], 
table:alias=[store_sales])
              HiveProject(d_date_sk=[$0])
                HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS 
NOT NULL($0))])
                  HiveTableScan(table=[[sub, date_dim]], table:alias=[date_dim])
    HiveProject(cnt=[$0])
      HiveFilter(condition=[<=(sq_count_check($0), 1)])
        HiveProject(cnt=[$0])
          HiveAggregate(group=[{}], cnt=[COUNT()])
            HiveProject
              HiveProject($f0=[$0])
                HiveAggregate(group=[{}], agg#0=[count($0)])
                  HiveJoin(condition=[=($0, $3)], joinType=[inner], 
algorithm=[none], cost=[{2.0 rows, 0.0 cpu, 0.0 io}])
                    HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], 
ss_list_price=[$2])
                      HiveFilter(condition=[IS NOT NULL($0)])
                        HiveTableScan(table=[[sub, store_sales]], 
table:alias=[store_sales])
                    HiveProject(d_date_sk=[$0])
                      HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), 
IS NOT NULL($0))])
                        HiveTableScan(table=[[sub, date_dim]], 
table:alias=[date_dim])
{noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to