PR #23403 opened by Jun Zhao (mypopydev)
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23403
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23403.patch

Emit ASS {\k} centisecond overrides when burning VTT karaoke cues with
the subtitles filter.  Out-of-range, non-monotonic, and malformed
inline timestamps are ignored; cues without valid timestamps are
unchanged.

ASS {\k} karaoke approximates WebVTT timed text, not ::cue(:past/:future)
styling.

Fixes: ticket #23289
Signed-off-by: Jun Zhao <[email protected]>






>From 705250a38a57383f115a08e218a041d0cd878786 Mon Sep 17 00:00:00 2001
From: Jun Zhao <[email protected]>
Date: Mon, 8 Jun 2026 00:53:26 +0800
Subject: [PATCH] avcodec/webvttdec: map WebVTT cue timestamps to ASS {\k} tags

Emit ASS {\k} centisecond overrides when burning VTT karaoke cues with
the subtitles filter.  Out-of-range, non-monotonic, and malformed
inline timestamps are ignored; cues without valid timestamps are
unchanged.

ASS {\k} karaoke approximates WebVTT timed text, not ::cue(:past/:future)
styling.

Fixes: ticket #23289
Signed-off-by: Jun Zhao <[email protected]>
---
 libavcodec/webvttdec.c | 77 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 75 insertions(+), 2 deletions(-)

diff --git a/libavcodec/webvttdec.c b/libavcodec/webvttdec.c
index 90675bc660..89eb9e7fdf 100644
--- a/libavcodec/webvttdec.c
+++ b/libavcodec/webvttdec.c
@@ -48,9 +48,61 @@ static const struct {
     {"u", "{\\u1}"}, {"/u", "{\\u0}"},
 };
 
-static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
+/* parse a WebVTT timestamp string (HH:MM:SS.mmm or MM:SS.mmm).
+ * Returns milliseconds or -1 on failure. */
+static int64_t parse_webvtt_timestamp(const char *buf)
+{
+    int h = 0, m = 0, s = 0, ms = 0;
+
+    if (sscanf(buf, "%d:%2d:%2d.%3d", &h, &m, &s, &ms) == 4) {
+        if (m > 59 || s > 59)
+            return -1;
+        return (int64_t)h * 3600000 + m * 60000 + s * 1000 + ms;
+    }
+    if (sscanf(buf, "%2d:%2d.%3d", &m, &s, &ms) == 3) {
+        if (m > 59 || s > 59)
+            return -1;
+        return m * 60000 + s * 1000 + ms;
+    }
+
+    return -1;
+}
+
+static void append_k_cs(AVBPrint *buf, int64_t ms)
+{
+    if (ms > 0)
+        av_bprintf(buf, "{\\k%"PRId64"}", (ms + 5) / 10);
+}
+
+/* validate a cue timestamp tag body: must be digits/colons/periods,
+ * parseable, strictly within (cue_start, cue_end), and after prev_ts.
+ * Returns 1 and writes to *ts_out on success, 0 on failure. */
+static int read_cue_timestamp(const char *body, int len,
+                              int64_t cue_start, int64_t cue_end,
+                              int64_t prev_ts, int64_t *ts_out)
+{
+    int64_t ts;
+
+    if (len < 1 || !av_isdigit(body[0]))
+        return 0;
+    if ((int)strspn(body, "0123456789:.") != len)
+        return 0;
+
+    ts = parse_webvtt_timestamp(body);
+    if (ts <= cue_start || ts >= cue_end)
+        return 0;
+    if (prev_ts >= 0 && ts <= prev_ts)
+        return 0;
+
+    *ts_out = ts;
+    return 1;
+}
+
+static int webvtt_event_to_ass(AVBPrint *buf, const char *p,
+                               int64_t cue_start_ms, int64_t cue_end_ms)
 {
     int i, again = 0;
+    int64_t prev_ts = -1, ts;
 
     while (*p) {
         if (*p == '<') {
@@ -59,6 +111,19 @@ static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
             if (!tag_end)
                 break;
             len = tag_end - p + 1;
+
+            /* cue timestamp: emit {\k} for elapsed ms since cue_start
+             * or prev_ts; following text uses this timing */
+            if (len > 2 &&
+                read_cue_timestamp(p + 1, (int)(len - 2),
+                                   cue_start_ms, cue_end_ms, prev_ts, &ts)) {
+                append_k_cs(buf, ts - (prev_ts >= 0 ? prev_ts : cue_start_ms));
+                prev_ts = ts;
+                p += len;
+                again = 1;
+                continue;
+            }
+
             for (i = 0; i < FF_ARRAY_ELEMS(webvtt_valid_tags); i++) {
                 const char *from = webvtt_valid_tags[i].from;
                 if(!strncmp(p + 1, from, strlen(from))) {
@@ -91,6 +156,11 @@ static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
             av_bprint_chars(buf, *p, 1);
         p++;
     }
+
+    /* trailing segment: last valid ts to cue end */
+    if (prev_ts >= 0 && cue_end_ms > FFMAX(prev_ts, cue_start_ms))
+        append_k_cs(buf, cue_end_ms - FFMAX(prev_ts, cue_start_ms));
+
     return 0;
 }
 
@@ -103,7 +173,10 @@ static int webvtt_decode_frame(AVCodecContext *avctx, 
AVSubtitle *sub,
     AVBPrint buf;
 
     av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
-    if (ptr && avpkt->size > 0 && !webvtt_event_to_ass(&buf, ptr))
+    if (ptr && avpkt->size > 0 &&
+        !webvtt_event_to_ass(&buf, ptr,
+                             avpkt->pts,
+                             avpkt->pts + avpkt->duration))
         ret = ff_ass_add_rect(sub, buf.str, s->readorder++, 0, NULL, NULL);
     av_bprint_finalize(&buf, NULL);
     if (ret < 0)
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to