[ 
https://issues.apache.org/jira/browse/KYLIN-4582?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

YaoChen updated KYLIN-4582:
---------------------------
    Description: 
Query result is correct when grouping have two sets as below:
{code:sql}
group by grouping sets(
(a.period_id,a.company_name,a.province,a.new_flag),
(a.period_id,a.company_name,a.new_flag))
{code}
full sql:
{code:sql}
select period_id,
       case grouping(a.company_name) when 1 then 'ALL' else a.company_name end 
company_name,
       case grouping(a.province) when 1 then 'ALL' else a.province end province,
       case grouping(a.new_flag) when 1 then 'ALL' else a.new_flag end new_flag,
       count(distinct user_id) as index2  --'users'
from (
select monthid as period_id,
       case company_id when '1' then 'xx1' when '2' then 'xx2'
       end as company_name,
       province_name as province,
       case new_user_flag when '1' then '新增' when '2' then '存量' when '3' then 
'存量' end as new_flag,
       user_id
from rpt_culture_order_m
where monthid between '202005' and '{202005}' and order_flag=2 and company_id=4
) a 
group by grouping sets(
(a.period_id,a.company_name,a.province,a.new_flag),
(a.period_id,a.company_name,a.new_flag))
{code}
!right_result.png|width=843,height=547!

*{color:#ff0000}But if we add one or more grouping the query result (count 
distinct(user_id)) is wrong.{color}*
{code:sql}
group by grouping sets(
(a.period_id,a.company_name,a.province,a.new_flag),
(a.period_id,a.company_name,a.new_flag),
(a.period_id,a.company_name))
{code}
full sql:
{code:sql}
select period_id,
       case grouping(a.company_name) when 1 then 'ALL' else a.company_name end 
company_name,
       case grouping(a.province) when 1 then 'ALL' else a.province end province,
       case grouping(a.new_flag) when 1 then 'ALL' else a.new_flag end new_flag,
       count(distinct user_id) as index2  --'users'
from (
select monthid as period_id,
       case company_id when '1' then 'xx1' when '2' then 'xx2'
       end as company_name,
       province_name as province,
       case new_user_flag when '1' then '新增' when '2' then '存量' when '3' then 
'存量' end as new_flag,
       user_id
from rpt_culture_order_m
where monthid between '202005' and '{202005}' and order_flag=2 and company_id=4
) a 
group by grouping sets(
(a.period_id,a.company_name,a.province,a.new_flag),
(a.period_id,a.company_name,a.new_flag),
(a.period_id,a.company_name)
)
{code}
 
 !count_distinct_grouping_sets_wrong_result.png!

The result of '新增‘ (column :INDEX2) should be 3170752 instead of 34808840 which 
is equal with the result of 'all'.
 Further, we have test that when change the aggregation from count distinct to 
count or sum the result is right.

  was:
Query result is correct when grouping have two sets as below:
{code:sql}
group by grouping sets(
(a.period_id,a.company_name,a.province,a.new_flag),
(a.period_id,a.company_name,a.new_flag))
{code}
full sql:
{code:sql}
select period_id,
       case grouping(a.company_name) when 1 then 'ALL' else a.company_name end 
company_name,
       case grouping(a.province) when 1 then 'ALL' else a.province end province,
       case grouping(a.new_flag) when 1 then 'ALL' else a.new_flag end new_flag,
       count(distinct user_id) as index2  --'users'
from (
select monthid as period_id,
       case company_id when '1' then 'xx1' when '2' then 'xx2'
       end as company_name,
       province_name as province,
       case new_user_flag when '1' then '新增' when '2' then '存量' when '3' then 
'存量' end as new_flag,
       user_id
from rpt_culture_order_m
where monthid between '202005' and '{202005}' and order_flag=2 and company_id=4
) a 
group by grouping sets(
(a.period_id,a.company_name,a.province,a.new_flag),
(a.period_id,a.company_name,a.new_flag))
{code}
!right_result.png!

*{color:#ff0000}But if we add one or more grouping the query result (count 
distinct(user_id)) is wrong.{color}*
{code:sql}
group by grouping sets(
(a.period_id,a.company_name,a.province,a.new_flag),
(a.period_id,a.company_name,a.new_flag),
(a.period_id,a.company_name))
{code}
full sql:
{code:sql}
select period_id,
       case grouping(a.company_name) when 1 then 'ALL' else a.company_name end 
company_name,
       case grouping(a.province) when 1 then 'ALL' else a.province end province,
       case grouping(a.new_flag) when 1 then 'ALL' else a.new_flag end new_flag,
       count(distinct user_id) as index2  --'users'
from (
select monthid as period_id,
       case company_id when '1' then 'xx1' when '2' then 'xx2'
       end as company_name,
       province_name as province,
       case new_user_flag when '1' then '新增' when '2' then '存量' when '3' then 
'存量' end as new_flag,
       user_id
from rpt_culture_order_m
where monthid between '202005' and '{202005}' and order_flag=2 and company_id=4
) a 
group by grouping sets(
(a.period_id,a.company_name,a.province,a.new_flag),
(a.period_id,a.company_name,a.new_flag),
(a.period_id,a.company_name)
)
{code}
 
!count_distinct_grouping_sets_wrong_result.png!

The result of '新增‘ (column :INDEX2) should be 3170752 instead of 34808840 which 
is equal with the result of 'all'.
Further, we have test that when change the aggregation from count distinct to 
count or sum the result is right.


> query with count distinct and grouping sets get wrong result
> ------------------------------------------------------------
>
>                 Key: KYLIN-4582
>                 URL: https://issues.apache.org/jira/browse/KYLIN-4582
>             Project: Kylin
>          Issue Type: Bug
>          Components: Query Engine
>    Affects Versions: v2.5.2
>         Environment: hdp 2.6
>            Reporter: YaoChen
>            Priority: Critical
>         Attachments: count_distinct_grouping_sets_wrong_result.png, 
> right_result.png
>
>
> Query result is correct when grouping have two sets as below:
> {code:sql}
> group by grouping sets(
> (a.period_id,a.company_name,a.province,a.new_flag),
> (a.period_id,a.company_name,a.new_flag))
> {code}
> full sql:
> {code:sql}
> select period_id,
>        case grouping(a.company_name) when 1 then 'ALL' else a.company_name 
> end company_name,
>        case grouping(a.province) when 1 then 'ALL' else a.province end 
> province,
>        case grouping(a.new_flag) when 1 then 'ALL' else a.new_flag end 
> new_flag,
>        count(distinct user_id) as index2  --'users'
> from (
> select monthid as period_id,
>        case company_id when '1' then 'xx1' when '2' then 'xx2'
>        end as company_name,
>        province_name as province,
>        case new_user_flag when '1' then '新增' when '2' then '存量' when '3' then 
> '存量' end as new_flag,
>        user_id
> from rpt_culture_order_m
> where monthid between '202005' and '{202005}' and order_flag=2 and 
> company_id=4
> ) a 
> group by grouping sets(
> (a.period_id,a.company_name,a.province,a.new_flag),
> (a.period_id,a.company_name,a.new_flag))
> {code}
> !right_result.png|width=843,height=547!
> *{color:#ff0000}But if we add one or more grouping the query result (count 
> distinct(user_id)) is wrong.{color}*
> {code:sql}
> group by grouping sets(
> (a.period_id,a.company_name,a.province,a.new_flag),
> (a.period_id,a.company_name,a.new_flag),
> (a.period_id,a.company_name))
> {code}
> full sql:
> {code:sql}
> select period_id,
>        case grouping(a.company_name) when 1 then 'ALL' else a.company_name 
> end company_name,
>        case grouping(a.province) when 1 then 'ALL' else a.province end 
> province,
>        case grouping(a.new_flag) when 1 then 'ALL' else a.new_flag end 
> new_flag,
>        count(distinct user_id) as index2  --'users'
> from (
> select monthid as period_id,
>        case company_id when '1' then 'xx1' when '2' then 'xx2'
>        end as company_name,
>        province_name as province,
>        case new_user_flag when '1' then '新增' when '2' then '存量' when '3' then 
> '存量' end as new_flag,
>        user_id
> from rpt_culture_order_m
> where monthid between '202005' and '{202005}' and order_flag=2 and 
> company_id=4
> ) a 
> group by grouping sets(
> (a.period_id,a.company_name,a.province,a.new_flag),
> (a.period_id,a.company_name,a.new_flag),
> (a.period_id,a.company_name)
> )
> {code}
>  
>  !count_distinct_grouping_sets_wrong_result.png!
> The result of '新增‘ (column :INDEX2) should be 3170752 instead of 34808840 
> which is equal with the result of 'all'.
>  Further, we have test that when change the aggregation from count distinct 
> to count or sum the result is right.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to