nimesh1601 commented on issue #9316:
URL: 
https://github.com/apache/incubator-gluten/issues/9316#issuecomment-2839873280

   @zhouyuan faced the same issue 
   Sample sql
   WITH  cdl_summary AS ( SELECT user_id, CASE WHEN <TRACKING_LABEL_COL> = 
'store_front' THEN 'storefront' ELSE <TRACKING_LABEL_COL> END AS source,  
SUM(CASE WHEN name IN (<IMPRESSION_EVENTS>)      AND datestr BETWEEN :START_7D  
 AND :END_DATE THEN 1 ELSE 0 END) AS impression_count_7d, SUM(CASE WHEN name IN 
(<IMPRESSION_EVENTS>)      AND datestr BETWEEN :START_14D  AND :END_DATE THEN 1 
ELSE 0 END) AS impression_count_14d, SUM(CASE WHEN name IN 
(<IMPRESSION_EVENTS>)      AND datestr BETWEEN :START_28D  AND :END_DATE THEN 1 
ELSE 0 END) AS impression_count_28d, SUM(CASE WHEN name IN 
(<IMPRESSION_EVENTS>)      AND datestr BETWEEN :START_56D  AND :END_DATE THEN 1 
ELSE 0 END) AS impression_count_56d, SUM(CASE WHEN name IN 
(<IMPRESSION_EVENTS>)      AND datestr BETWEEN :START_112D AND :END_DATE THEN 1 
ELSE 0 END) AS impression_count_112d,   SUM(CASE WHEN name IN (<CLICK_EVENTS>)  
    AND datestr BETWEEN :START_7D   AND :END_DATE THEN 1 ELSE 0 END) AS 
click_count_7d, SUM(CASE WHEN name IN (<
 CLICK_EVENTS>)      AND datestr BETWEEN :START_14D  AND :END_DATE THEN 1 ELSE 
0 END) AS click_count_14d, SUM(CASE WHEN name IN (<CLICK_EVENTS>)      AND 
datestr BETWEEN :START_28D  AND :END_DATE THEN 1 ELSE 0 END) AS 
click_count_28d, SUM(CASE WHEN name IN (<CLICK_EVENTS>)      AND datestr 
BETWEEN :START_56D  AND :END_DATE THEN 1 ELSE 0 END) AS click_count_56d, 
SUM(CASE WHEN name IN (<CLICK_EVENTS>)      AND datestr BETWEEN :START_112D AND 
:END_DATE THEN 1 ELSE 0 END) AS click_count_112d,   SUM(CASE WHEN name IN 
(<ORDER_EVENTS>)      AND datestr BETWEEN :START_7D   AND :END_DATE THEN 1 ELSE 
0 END) AS order_count_7d, SUM(CASE WHEN name IN (<ORDER_EVENTS>)      AND 
datestr BETWEEN :START_14D  AND :END_DATE THEN 1 ELSE 0 END) AS 
order_count_14d, SUM(CASE WHEN name IN (<ORDER_EVENTS>)      AND datestr 
BETWEEN :START_28D  AND :END_DATE THEN 1 ELSE 0 END) AS order_count_28d, 
SUM(CASE WHEN name IN (<ORDER_EVENTS>)      AND datestr BETWEEN :START_56D  AND 
:END_DATE THEN 1 ELSE 0 END) AS orde
 r_count_56d, SUM(CASE WHEN name IN (<ORDER_EVENTS>)      AND datestr BETWEEN 
:START_112D AND :END_DATE THEN 1 ELSE 0 END) AS order_count_112d,   AVG(CASE    
    WHEN name = 'marketplace_scrolled'        AND datestr BETWEEN :START_7D AND 
:END_DATE THEN 0       WHEN name IN 
('feed_item_card_scrolled','feed_item_dish_card_scrolled')        AND datestr 
BETWEEN :START_7D AND :END_DATE        THEN feed.display_item_position       
ELSE NULL     END) AS avg_scroll_depth_7d,     MAX(CASE WHEN name IN 
(<INTERACTION_EVENTS>)          AND datestr BETWEEN :START_112D AND :END_DATE   
   THEN epoch_ms ELSE NULL END) AS latest_interaction_time FROM 
<SCHEMA_CDL>.<TABLE_CDL> cdl WHERE TRUE AND datestr BETWEEN :START_112D AND 
:END_DATE AND name IN (<IMPRESSION_EVENTS>, <CLICK_EVENTS>, <ORDER_EVENTS>) AND 
COALESCE(<SESSION_ID_COL>, user_id) IS NOT NULL AND is_first_event = TRUE AND 
user_id IS NOT NULL AND user_id <> '' AND <FEED_CONTEXT_COL> IN 
('home','vertical','allstores','all_stores')  GROUP BY 1,2
  ),  xlb_summary AS ( SELECT user_id, CASE WHEN <TRACKING_LABEL_COL> = 
'store_front' THEN 'storefront' ELSE <TRACKING_LABEL_COL> END AS source,  
MAX(CASE WHEN name IN (<INTERACTION_EVENTS>)          AND datestr BETWEEN 
:START_112D AND :END_DATE      THEN epoch_ms ELSE NULL END) AS 
latest_interaction_time FROM <SCHEMA_XLB>.<TABLE_XLB> xlb WHERE TRUE  GROUP BY 
1,2 ),  data_summary AS ( SELECT COALESCE(cdl.user_id, xlb.user_id) AS user_id, 
COALESCE(cdl.source, xlb.source) AS source, COALESCE(cdl.impression_count_7d, 
0) + COALESCE(xlb.impression_count_7d, 0) AS impression_count_7d,  
GREATEST(cdl.latest_interaction_time, xlb.latest_interaction_time)   AS 
latest_interaction_time FROM cdl_summary cdl FULL OUTER JOIN xlb_summary xlb ON 
cdl.user_id = xlb.user_id AND cdl.source = xlb.source ),  summary_agg AS ( 
SELECT user_id, SUM(impression_count_7d) AS total_impression_7d,  
SUM(order_count_112d)     AS total_order_112d FROM data_summary GROUP BY 1 )  
INSERT OVERWRITE TABLE <SCHEMA_TARGET>.<
 TABLE_TARGET> PARTITION (datestr) SELECT CONCAT(d.user_id, '|', d.source) AS 
uuid, d.user_id, d.source, d.impression_count_7d,  agg.total_impression_7d,  
d.latest_interaction_time, :END_DATE AS datestr FROM data_summary d JOIN 
summary_agg agg ON d.user_id = agg.user_id WHERE d.source IS NOT NULL;
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to