具体SQL如下。
方案2:
INSERT INTO flink_sdk_stats
(
SELECT
DATE_FORMAT(TUMBLE_END(`event_time`, INTERVAL '5' MINUTE),
'yyyyMMddHHmm') AS `time`,
sid
AS `supply_id`,
'd77'
AS `field_key`,
d77
AS `filed_value`,
count(1)
AS `pv`
FROM
baidu_log_view
GROUP BY
sid,
d77,
TUMBLE(event_time, INTERVAL '5' MINUTE)
UNION ALL
SELECT
DATE_FORMAT(TUMBLE_END(`event_time`, INTERVAL '5' MINUTE),
'yyyyMMddHHmm') AS `time`,
sid
AS `supply_id`,
'd79'
AS `field_key`,
d79
AS `filed_value`,
count(1)
AS `pv`
FROM
baidu_log_view
GROUP BY
sid,
d79,
TUMBLE(event_time, INTERVAL '5' MINUTE)
UNION ALL
SELECT
DATE_FORMAT(TUMBLE_END(`event_time`, INTERVAL '5' MINUTE),
'yyyyMMddHHmm') AS `time`,
sid
AS `supply_id`,
'd80'
AS `field_key`,
d80
AS `filed_value`,
count(1)
AS `pv`
FROM
baidu_log_view
GROUP BY
sid,
d80,
TUMBLE(event_time, INTERVAL '5' MINUTE)
UNION ALL
SELECT
DATE_FORMAT(TUMBLE_END(`event_time`, INTERVAL '5' MINUTE),
'yyyyMMddHHmm') AS `time`,
sid
AS `supply_id`,
'd81'
AS `field_key`,
d81
AS `filed_value`,
count(1)
AS `pv`
FROM
baidu_log_view
GROUP BY
sid,
d81,
TUMBLE(event_time, INTERVAL '5' MINUTE)
UNION ALL
SELECT
DATE_FORMAT(TUMBLE_END(`event_time`, INTERVAL '5' MINUTE),
'yyyyMMddHHmm') AS `time`,
sid
AS `supply_id`,
'd83'
AS `field_key`,
d83
AS `filed_value`,
count(1)
AS `pv`
FROM
baidu_log_view
GROUP BY
sid,
d83,
TUMBLE(event_time, INTERVAL '5' MINUTE)
UNION ALL
SELECT
DATE_FORMAT(TUMBLE_END(`event_time`, INTERVAL '5' MINUTE),
'yyyyMMddHHmm') AS `time`,
sid
AS `supply_id`,
'd84'
AS `field_key`,
d84
AS `filed_value`,
count(1)
AS `pv`
FROM
baidu_log_view
GROUP BY
sid,
d84,
TUMBLE(event_time, INTERVAL '5' MINUTE)
UNION ALL
SELECT
DATE_FORMAT(TUMBLE_END(`event_time`, INTERVAL '5' MINUTE),
'yyyyMMddHHmm') AS `time`,
sid
AS `supply_id`,
'd86'
AS `field_key`,
d86
AS `field_value`,
count(1)
AS `pv`
FROM
baidu_log_view
GROUP BY
sid,
d86,
TUMBLE(event_time, INTERVAL '5' MINUTE)
);
方案3:
INSERT INTO flink_sdk_stats
SELECT
DATE_FORMAT(TUMBLE_END(`event_time`, INTERVAL '5' MINUTE),
'yyyyMMddHHmm') AS `time`,
`supply_id`,
`field_key`,
`field_value`,
count(1) AS `pv`
FROM
(
SELECT event_time, sid AS `supply_id`, 'd107' AS `field_key`,
d107 AS `field_value` FROM baidu_log_view
UNION ALL
SELECT event_time, sid AS `supply_id`, 'd77' AS `field_key`, d77
AS `field_value` FROM baidu_log_view
UNION ALL
SELECT event_time, sid AS `supply_id`, 'd77' AS `field_key`, d77
AS `field_value` FROM baidu_log_view
UNION ALL
SELECT event_time, sid AS `supply_id`, 'd79' AS `field_key`, d79
AS `field_value` FROM baidu_log_view
UNION ALL
SELECT event_time, sid AS `supply_id`, 'd80' AS `field_key`, d80
AS `field_value` FROM baidu_log_view
UNION ALL
SELECT event_time, sid AS `supply_id`, 'd81' AS `field_key`, d81
AS `field_value` FROM baidu_log_view
UNION ALL
SELECT event_time, sid AS `supply_id`, 'd83' AS `field_key`, d83
AS `field_value` FROM baidu_log_view
UNION ALL
SELECT event_time, sid AS `supply_id`, 'd84' AS `field_key`, d84
AS `field_value` FROM baidu_log_view
UNION ALL
SELECT event_time, sid AS `supply_id`, 'd86' AS `field_key`, d86
AS `field_value` FROM baidu_log_view
)
GROUP BY
`supply_id`, `field_key`, `field_value`, TUMBLE(event_time,
INTERVAL '5' MINUTE);
赵一旦 <[email protected]> 于2020年12月15日周二 下午10:48写道:
>
> 需要,针对某个表,按照key1(xxx+yyy+ky1),key2(xxx+yyy+ky2),....等多组key统计。其中xxx+yyy为共同字段。目前有如下3种实现我。
> (1)每组key分别统计,分别insert。
> (2)每组key分别统计,然后union结果,然后insert。
> (3)针对表多次select,然后union,然后再基于key统计,然后insert。
> 第三种方案中,会将ky1、ky2这几个不同的字段通过
>
> select 'ky1' as key_name, ky1 as key_value
> union
> select 'ky2' as key_name, ky2 as key_value
>
> 的方式统一为key这个字段,最后通过(xxx+yyy+key_name+key_value)的方式统计。
>
> 目前发现个问题,方案3中,window结点一直没有watermark,导致不发生计算。
>
>
>
>