mingren created HIVE-12760:
------------------------------

             Summary: union all in hive returns incorrect results.
                 Key: HIVE-12760
                 URL: https://issues.apache.org/jira/browse/HIVE-12760
             Project: Hive
          Issue Type: Bug
          Components: Hive
    Affects Versions: 1.2.1
         Environment: Hadoop 2.7.1
Hive 1.2.1
            Reporter: mingren


The issue can be recreated with following steps

1.create table

{quote}

CREATE TABLE `union_case`(
  `shopid` bigint,
  `platform` string,
  `source` string,
  `pv` bigint,
  `uv` bigint,
  `pv_all` bigint,
  `uv_all` bigint,
  `pv_rate` float,
  `uv_rate` float 
)
PARTITIONED BY (
  `visit_date` string)
ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"
STORED AS INPUTFORMAT
  'org.apache.hadoop.hive.ql.io.RCFileInputFormat'
OUTPUTFORMAT
  'org.apache.hadoop.hive.ql.io.RCFileOutputFormat'
LOCATION
  'hdfs://mgjcluster/apps/hive/warehouse/union_case'

{quote}

2.insert the following data

{quote}

insert into table union_case partition(visit_date='2015-12-24') values 

(102448,'wap','t1',2,1,10,10,0.5,0.5),
(102448,'pc','t1',2,1,10,10,0.5,0.5),
(102448,'app','t1',2,1,10,10,0.5,0.5),
(102443,'wap','t2',2,1,10,10,0.5,0.5),
(102443,'pc','t2',2,1,10,10,0.5,0.5),
(102443,'app','t2',2,1,10,10,0.5,0.5)

{quote}

3.execute the following query

{quote}

select platform,count(1) from

(select 
      shopid,
      1 as platform,
      source,
      pv,
      uv,
      pv_all,
      uv_all,
      pv_rate,
      uv_rate
  from union_case
  where visit_date = '2015-12-24' and platform = 'pc'

  union all

  select 
      a.shopid,
      2 as platform,
      a.source,
      a.pv as pv,
      a.uv as uv,
      b.pv_all as pv_all,
      b.uv_all as uv_all,
      a.pv/b.pv_all as pv_rate,
      a.uv/b.uv_all as uv_rate
  from (
      select a.shopid,a.source,sum(a.pv) as pv,sum(a.uv) as uv 
      from union_case a 
      where a.visit_date = '2015-12-24' and platform in ('wap','app') 
      group by a.shopid, a.source) a 

  left join (
      select 
      shopid,
      sum(pv) as pv_all,
      sum(uv) as uv_all
    from union_case
    where visit_date = '2015-12-24' and platform in ('wap','app')
    group by shopid
  ) b 
  on a.shopid = b.shopid

  ) x

group by platform

limit 5

{quote}

4. output of quering

{quote}

NULL    2
2   2

{quote}

**obviously, result is wrong.**



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to