yujunjun created CASSANDRA-5140:
-----------------------------------

             Summary: multi group by distinct error 
                 Key: CASSANDRA-5140
                 URL: https://issues.apache.org/jira/browse/CASSANDRA-5140
             Project: Cassandra
          Issue Type: Bug
            Reporter: yujunjun


I hive a hql use "set hive.optimize.multigroupby.common.distincts=true" get a 
different result with "set hive.optimize.multigroupby.common.distincts=false",
And the hql is :
set hive.optimize.multigroupby.common.distincts=true;
FROM
(
SELECT
      d.datekey datekey,
      d.`date` dt,
      d.week_num_overall week_num_overall,
      d.yearmo yearmo,
      uc.cityid cityid,
      p.userid userid,
      'all' clienttype,
      du.regdate regdate,
      if (f.orderid = p.orderid, 1, 0) isuserfirstpurchase,
      p.amount revenue
FROM
    fact.orderpayment p
    join dim.user_city uc on uc.userid = p.userid
    join dim.user du on du.userid = p.userid
    join detail.user_firstpurchase f on p.userid=f.userid
    join dim.`date` d on p.datekey = d.datekey

) base
INSERT overwrite TABLE `customer_kpi_periodic` partition (aggrtype = 'day')
    SELECT 
           'day' periodtype,
           base.datekey periodkey,
           'all' clienttype,
           0 cityid,
           count(distinct base.userid) buyer_count,           
sum(base.isuserfirstpurchase) first_buyer_count,
           count(distinct if(base.regdate = base.dt, base.userid, NULL)) 
regdate_buyer_count,
           count(*) order_count,
           sum(if(base.regdate = base.dt, 1, 0)) regdate_order_count,           
sum(base.revenue) revenue,
           sum(if(base.isuserfirstpurchase = 1, base.revenue, 0)) 
first_buyer_revenue,
           sum(if(base.regdate = base.dt, base.revenue, 0)) 
regdate_buyer_revenue

    GROUP BY base.datekey
INSERT overwrite TABLE `customer_kpi_periodic` partition (aggrtype = 'month')
    SELECT 
           'month' periodtype,           base.yearmo periodkey,
           'all' clienttype,           0 cityid,
           count(distinct base.userid) buyer_count,
           sum(base.isuserfirstpurchase) first_buyer_count,           
count(distinct if(base.regdate = base.dt, base.userid, NULL)) 
regdate_buyer_count,
           count(*) order_count,           sum(if(base.regdate = base.dt, 1, 
0)) regdate_order_count,
           sum(base.revenue) revenue,           sum(if(base.isuserfirstpurchase 
= 1, base.revenue, 0)) first_buyer_revenue,
           sum(if(base.regdate = base.dt, base.revenue, 0)) 
regdate_buyer_revenue
    GROUP BY base.yearmo

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators
For more information on JIRA, see: http://www.atlassian.com/software/jira

Reply via email to