Hi, I am a analyst from banggood.com,China.We have use impala to optimize hive or spark's work.But we found impala don't support aggregate function.There are some example below.While statement like 'set APPX_COUNT_DISTINCT=true; ' is not so sure for us to use.I would like to hear from you.Thank you!
https://segmentfault.com/a/1190000007530586 --复购率 set mapreduce.job.queuename=os; select a.date1, count(distinct a.customers_id) users, count(distinct case when datediff(b.date1,a.date1) between 1 and 7 then b.customers_id end) rebuy_1, count(distinct case when datediff(b.date1,a.date1) between 8 and 15 then b.customers_id end) rebuy_2, count(distinct case when datediff(b.date1,a.date1) between 16 and 30 then b.customers_id end) rebuy_3, count(distinct case when datediff(b.date1,a.date1) between 31 and 60 then b.customers_id end) rebuy_4, count(distinct case when datediff(b.date1,a.date1) between 61 and 90 then b.customers_id end) rebuy_5, count(distinct case when datediff(b.date1,a.date1) between 91 and 120 then b.customers_id end) rebuy_6, count(distinct case when datediff(b.date1,a.date1) between 121 and 180 then b.customers_id end) rebuy_7, count(distinct case when datediff(b.date1,a.date1) between 181 and 360 then b.customers_id end) rebuy_8, count(distinct case when datediff(b.date1,a.date1) > 361 then b.customers_id end) rebuy_9 from (select from_unixtime(add_time,'yyyy-MM-dd') date1,customers_id,row_number() over(partition by customers_id order by from_unixtime(add_time,'yyyy-MM-dd')) rown from datacube.dc_orders where from_unixtime(add_time,'yyyy-MM-dd') >= '2017-01-01' and domain = 1 and order_type not in ('dropship','dropshipping','wholesale','gift','snatch') and customers_id <> 0 and orders_status NOT IN (1 , 4, 6, 12, 17, 20, 21, 22, 23, 27) -- and site in ('pwa.yoins.com','android','ios','www.yoins.com','m.yoins.com') group by from_unixtime(add_time,'yyyy-MM-dd'),customers_id ) a left outer join ( select from_unixtime(add_time,'yyyy-MM-dd') as date1,customers_id,row_number() over(partition by customers_id order by from_unixtime(add_time,'yyyy-MM-dd')) as rown from datacube.dc_orders where from_unixtime(add_time,'yyyy-MM-dd') >= '2017-01-01' and domain = 1 and order_type not in ('dropship','dropshipping','wholesale','gift','snatch') and customers_id <> 0 and orders_status NOT IN (1 , 4, 6, 12, 17, 20, 21, 22, 23, 27) -- and site in ('pwa.yoins.com','android','ios','www.yoins.com','m.yoins.com') group by from_unixtime(add_time,'yyyy-MM-dd'),customers_id ) b on (a.customers_id=b.customers_id and a.rown=b.rown-1) group by a.date1 order by date1 ; chenbix...@banggood.com