Hi all:I was using spark sql on hive for doing some statistics. While using 
spark sql, I found that, sometimes, spark-sql got the incorrect results while 
select. And if I executed the same select statement again, it returned the 
different results which was correct. That is saying, sometime, with the same 
input and the same select logic, the select result is different. That is so 
weird!Is there any hints? Thanks.
val selectJoin =
      s"""
        | insert into table $tblLoginLogStatDay
        | select '$statDate' as date, '$USER_TYPE' as user_type, a.platform, 
a.channel, a.product_version,
        | coalesce(c.total_user_90,0), a.total_user, 
coalesce(b.one_time_startup, 0), a.total_startup, coalesce(f.dau,0), 
coalesce(d.wau,0), coalesce(e.mau,0)
        | from
        |     (select platform, channel, product_version, count(distinct 
$userType) as total_user, sum(login_cnt) as total_startup
        |         from $tblNewTable
        |         group by platform, channel, product_version) a
        |
        |     left outer join
        |
        |     (select platform, channel, product_version, count(distinct 
$userType) as one_time_startup
        |         from $tblNewTable
        |         where login_cnt = 1
        |         group by platform, channel, product_version) b
        |     on a. platform = b.platform and a.channel = b.channel and 
a.product_version = b.product_version
        |
        |     left outer join
        |
        |     (select platform, channel, product_version, count(distinct 
$userType) as total_user_90
        |         from $tblNewTable90
        |         group by platform, channel, product_version) c
        |     on a.platform = c.platform and a.channel = c.channel and 
a.product_version = c.product_version
        |
        |     left outer join
        |
        |     (select platform, channel, product_version, count(distinct 
$userType) as wau
        |         from $tblNewTable90
        |         where  from_unixtimestamp_to_date(last_time) >= 
date_sub('$statDate', 6)
        |         group by platform, channel, product_version) d
        |     on a.platform = d.platform and a.channel = d.channel and 
a.product_version = d.product_version
        |
        |     left outer join
        |
        |     (select platform, channel, product_version, count(distinct 
$userType) as mau
        |         from $tblNewTable90
        |         where  from_unixtimestamp_to_date(last_time) >= 
date_sub('$statDate', 29)
        |         group by platform, channel, product_version) e
        |     on a.platform = e.platform and a.channel = e.channel and 
a.product_version = e.product_version
        |
        |     left outer join
        |
        |     (select platform, channel, product_version, count(distinct 
$userType) as dau
        |        from $tblNewTable90
        |        where from_unixtimestamp_to_date(last_time) = '$statDate'
        |        group by platform, channel, product_version) f
        |     on a.platform = f.platform and a.channel = f.channel and 
a.product_version = f.product_version
      """.stripMargin

spark version: 1.2.0
hive version: 0.10.0-cdh4.2.1
​                                         

Reply via email to