nimesh1601 commented on issue #9316:
URL:
https://github.com/apache/incubator-gluten/issues/9316#issuecomment-2839873280
@zhouyuan faced the same issue
Sample sql
WITH cdl_summary AS ( SELECT user_id, CASE WHEN <TRACKING_LABEL_COL> =
'store_front' THEN 'storefront' ELSE <TRACKING_LABEL_COL> END AS source,
SUM(CASE WHEN name IN (<IMPRESSION_EVENTS>) AND datestr BETWEEN :START_7D
AND :END_DATE THEN 1 ELSE 0 END) AS impression_count_7d, SUM(CASE WHEN name IN
(<IMPRESSION_EVENTS>) AND datestr BETWEEN :START_14D AND :END_DATE THEN 1
ELSE 0 END) AS impression_count_14d, SUM(CASE WHEN name IN
(<IMPRESSION_EVENTS>) AND datestr BETWEEN :START_28D AND :END_DATE THEN 1
ELSE 0 END) AS impression_count_28d, SUM(CASE WHEN name IN
(<IMPRESSION_EVENTS>) AND datestr BETWEEN :START_56D AND :END_DATE THEN 1
ELSE 0 END) AS impression_count_56d, SUM(CASE WHEN name IN
(<IMPRESSION_EVENTS>) AND datestr BETWEEN :START_112D AND :END_DATE THEN 1
ELSE 0 END) AS impression_count_112d, SUM(CASE WHEN name IN (<CLICK_EVENTS>)
AND datestr BETWEEN :START_7D AND :END_DATE THEN 1 ELSE 0 END) AS
click_count_7d, SUM(CASE WHEN name IN (<
CLICK_EVENTS>) AND datestr BETWEEN :START_14D AND :END_DATE THEN 1 ELSE
0 END) AS click_count_14d, SUM(CASE WHEN name IN (<CLICK_EVENTS>) AND
datestr BETWEEN :START_28D AND :END_DATE THEN 1 ELSE 0 END) AS
click_count_28d, SUM(CASE WHEN name IN (<CLICK_EVENTS>) AND datestr
BETWEEN :START_56D AND :END_DATE THEN 1 ELSE 0 END) AS click_count_56d,
SUM(CASE WHEN name IN (<CLICK_EVENTS>) AND datestr BETWEEN :START_112D AND
:END_DATE THEN 1 ELSE 0 END) AS click_count_112d, SUM(CASE WHEN name IN
(<ORDER_EVENTS>) AND datestr BETWEEN :START_7D AND :END_DATE THEN 1 ELSE
0 END) AS order_count_7d, SUM(CASE WHEN name IN (<ORDER_EVENTS>) AND
datestr BETWEEN :START_14D AND :END_DATE THEN 1 ELSE 0 END) AS
order_count_14d, SUM(CASE WHEN name IN (<ORDER_EVENTS>) AND datestr
BETWEEN :START_28D AND :END_DATE THEN 1 ELSE 0 END) AS order_count_28d,
SUM(CASE WHEN name IN (<ORDER_EVENTS>) AND datestr BETWEEN :START_56D AND
:END_DATE THEN 1 ELSE 0 END) AS orde
r_count_56d, SUM(CASE WHEN name IN (<ORDER_EVENTS>) AND datestr BETWEEN
:START_112D AND :END_DATE THEN 1 ELSE 0 END) AS order_count_112d, AVG(CASE
WHEN name = 'marketplace_scrolled' AND datestr BETWEEN :START_7D AND
:END_DATE THEN 0 WHEN name IN
('feed_item_card_scrolled','feed_item_dish_card_scrolled') AND datestr
BETWEEN :START_7D AND :END_DATE THEN feed.display_item_position
ELSE NULL END) AS avg_scroll_depth_7d, MAX(CASE WHEN name IN
(<INTERACTION_EVENTS>) AND datestr BETWEEN :START_112D AND :END_DATE
THEN epoch_ms ELSE NULL END) AS latest_interaction_time FROM
<SCHEMA_CDL>.<TABLE_CDL> cdl WHERE TRUE AND datestr BETWEEN :START_112D AND
:END_DATE AND name IN (<IMPRESSION_EVENTS>, <CLICK_EVENTS>, <ORDER_EVENTS>) AND
COALESCE(<SESSION_ID_COL>, user_id) IS NOT NULL AND is_first_event = TRUE AND
user_id IS NOT NULL AND user_id <> '' AND <FEED_CONTEXT_COL> IN
('home','vertical','allstores','all_stores') GROUP BY 1,2
), xlb_summary AS ( SELECT user_id, CASE WHEN <TRACKING_LABEL_COL> =
'store_front' THEN 'storefront' ELSE <TRACKING_LABEL_COL> END AS source,
MAX(CASE WHEN name IN (<INTERACTION_EVENTS>) AND datestr BETWEEN
:START_112D AND :END_DATE THEN epoch_ms ELSE NULL END) AS
latest_interaction_time FROM <SCHEMA_XLB>.<TABLE_XLB> xlb WHERE TRUE GROUP BY
1,2 ), data_summary AS ( SELECT COALESCE(cdl.user_id, xlb.user_id) AS user_id,
COALESCE(cdl.source, xlb.source) AS source, COALESCE(cdl.impression_count_7d,
0) + COALESCE(xlb.impression_count_7d, 0) AS impression_count_7d,
GREATEST(cdl.latest_interaction_time, xlb.latest_interaction_time) AS
latest_interaction_time FROM cdl_summary cdl FULL OUTER JOIN xlb_summary xlb ON
cdl.user_id = xlb.user_id AND cdl.source = xlb.source ), summary_agg AS (
SELECT user_id, SUM(impression_count_7d) AS total_impression_7d,
SUM(order_count_112d) AS total_order_112d FROM data_summary GROUP BY 1 )
INSERT OVERWRITE TABLE <SCHEMA_TARGET>.<
TABLE_TARGET> PARTITION (datestr) SELECT CONCAT(d.user_id, '|', d.source) AS
uuid, d.user_id, d.source, d.impression_count_7d, agg.total_impression_7d,
d.latest_interaction_time, :END_DATE AS datestr FROM data_summary d JOIN
summary_agg agg ON d.user_id = agg.user_id WHERE d.source IS NOT NULL;
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]