[jira] [Updated] (HIVE-12530) Merge join in mutiple subsquence join and a mapjoin in it in mr model

2018-07-26 Thread BELUGA BEHR (JIRA)


 [ 
https://issues.apache.org/jira/browse/HIVE-12530?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

BELUGA BEHR updated HIVE-12530:
---
Description: 
sample hql:
{code:sql}
select  A.state_date, 
   A.customer, 
   A.channel_2,
   A.id,
   A.pid,
   A.type,
   A.pv,
   A.uv,
   A.visits,
   if(C.stay_visits is null,0,C.stay_visits) as stay_visits,
   A.stay_time,
   if(B.bounce is null,0,B.bounce) as bounce
 from
 (select a.state_date, 
a.customer, 
b.url as channel_2,
b.id,
b.pid,
b.type,
count(1) as pv,
count(distinct a.gid) uv,
count(distinct a.session_id) as visits,
sum(a.stay_time) as stay_time
   from   
   ( select state_date, 
   customer, 
   gid,
   session_id,
   ep,
   stay_time
from bdi_fact.mid_pageview_dt0
where l_date ='$v_date'
  )a
  join
  (select l_date as state_date ,
  url,
  id,
  pid,
  type,
  cid
   from bdi_fact.frequency_channel
   where l_date ='$v_date'
   and type ='2'
   and dr='0'
  )b
   on  a.customer=b.cid  
   where a.ep  rlike b.url
   group by a.state_date, a.customer, b.url,b.id,b.pid,b.type
   )A
  
left outer join
   (   select 
   c.state_date ,
   c.customer ,
   d.url as channel_2,
   d.id,
   sum(pagedepth) as bounce
from
  ( select 
  t1.state_date ,
  t1.customer ,
  t1.session_id,
  t1.ep,
  t2.pagedepth
from   
 ( select 
 state_date ,
 customer ,
 session_id,
 exit_url as ep
  from ods.mid_session_enter_exit_dt0
  where l_date ='$v_date'
  )t1
 join
  ( select 
state_date ,
customer ,
session_id,
pagedepth
from ods.mid_session_action_dt0
where l_date ='$v_date'
and  pagedepth='1'
  )t2
 on t1.customer=t2.customer
 and t1.session_id=t2.session_id
   )c
   join
   (select *
   from bdi_fact.frequency_channel
   where l_date ='$v_date'
   and type ='2'
   and dr='0'
   )d
   on c.customer=d.cid
   where c.ep  rlike d.url
   group by  c.state_date,c.customer,d.url,d.id
 )B
 on 
 A.customer=B.customer
 and A.channel_2=B.channel_2 
 and A.id=B.id
  left outer join
 ( 
 select e.state_date, 
e.customer, 
f.url as channel_2,
f.id,
f.pid,
f.type,
count(distinct e.session_id) as stay_visits
   from   
   ( select state_date, 
   customer, 
   gid,
   session_id,
   ep,
   stay_time
from bdi_fact.mid_pageview_dt0
where l_date ='$v_date'
  )e
  join
  (select l_date as state_date,
  url,
  id,
  pid,
  type,
  cid
   from bdi_fact.frequency_channel
   where l_date ='$v_date'
   and type ='2'
   and dr='0'
  )f
   on  e.customer=f.cid  
   where e.ep  rlike f.url
   and e.stay_time is not null
   and e.stay_time <>'0'
   group by e.state_date, e.customer, 

[jira] [Updated] (HIVE-12530) Merge join in mutiple subsquence join and a mapjoin in it in mr model

2015-12-22 Thread Thejas M Nair (JIRA)

 [ 
https://issues.apache.org/jira/browse/HIVE-12530?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Thejas M Nair updated HIVE-12530:
-
Fix Version/s: (was: 2.00)

> Merge join in mutiple subsquence join and a mapjoin in it in mr model
> -
>
> Key: HIVE-12530
> URL: https://issues.apache.org/jira/browse/HIVE-12530
> Project: Hive
>  Issue Type: Bug
>  Components: CLI
>Affects Versions: 1.2.1
>Reporter: Feng Yuan
>
> sample hql:
> select  A.state_date, 
>A.customer, 
>A.channel_2,
>A.id,
>A.pid,
>A.type,
>A.pv,
>A.uv,
>A.visits,
>if(C.stay_visits is null,0,C.stay_visits) as stay_visits,
>A.stay_time,
>if(B.bounce is null,0,B.bounce) as bounce
>  from
>  (select a.state_date, 
> a.customer, 
> b.url as channel_2,
> b.id,
> b.pid,
> b.type,
> count(1) as pv,
> count(distinct a.gid) uv,
> count(distinct a.session_id) as visits,
> sum(a.stay_time) as stay_time
>from   
>( select state_date, 
>customer, 
>gid,
>session_id,
>ep,
>stay_time
> from bdi_fact.mid_pageview_dt0
> where l_date ='$v_date'
>   )a
>   join
>   (select l_date as state_date ,
>   url,
>   id,
>   pid,
>   type,
>   cid
>from bdi_fact.frequency_channel
>where l_date ='$v_date'
>and type ='2'
>and dr='0'
>   )b
>on  a.customer=b.cid  
>where a.ep  rlike b.url
>group by a.state_date, a.customer, b.url,b.id,b.pid,b.type
>)A
>   
> left outer join
>(   select 
>c.state_date ,
>c.customer ,
>d.url as channel_2,
>d.id,
>sum(pagedepth) as bounce
> from
>   ( select 
>   t1.state_date ,
>   t1.customer ,
>   t1.session_id,
>   t1.ep,
>   t2.pagedepth
> from   
>  ( select 
>  state_date ,
>  customer ,
>  session_id,
>  exit_url as ep
>   from ods.mid_session_enter_exit_dt0
>   where l_date ='$v_date'
>   )t1
>  join
>   ( select 
> state_date ,
> customer ,
> session_id,
> pagedepth
> from ods.mid_session_action_dt0
> where l_date ='$v_date'
> and  pagedepth='1'
>   )t2
>  on t1.customer=t2.customer
>  and t1.session_id=t2.session_id
>)c
>join
>(select *
>from bdi_fact.frequency_channel
>where l_date ='$v_date'
>and type ='2'
>and dr='0'
>)d
>on c.customer=d.cid
>where c.ep  rlike d.url
>group by  c.state_date,c.customer,d.url,d.id
>  )B
>  on 
>  A.customer=B.customer
>  and A.channel_2=B.channel_2 
>  and A.id=B.id
>   left outer join
>  ( 
>  select e.state_date, 
> e.customer, 
> f.url as channel_2,
> f.id,
> f.pid,
> f.type,
> count(distinct e.session_id) as stay_visits
>from   
>( select state_date, 
>customer, 
>gid,
>session_id,
>ep,
>stay_time
> from bdi_fact.mid_pageview_dt0
> where l_date ='$v_date'
>