From 5bd4a6f56231c3160be6c2d322f5d1081d7b3622 Mon Sep 17 00:00:00 2001
From: wang-bin <wbsecg1@gmail.com>
Date: Tue, 17 Dec 2024 23:21:57 +0800
Subject: [PATCH] matroskadec: support S_TEXT/WEBVTT

fix ticket #5641
---
 libavformat/matroska.c    |   1 +
 libavformat/matroskadec.c | 102 ++++++++++++++++++++++----------------
 2 files changed, 60 insertions(+), 43 deletions(-)

diff --git a/libavformat/matroska.c b/libavformat/matroska.c
index d0ecfbeb6a..23c4ad6949 100644
--- a/libavformat/matroska.c
+++ b/libavformat/matroska.c
@@ -63,6 +63,7 @@ const CodecTags ff_mkv_codec_tags[]={
     {"D_WEBVTT/CAPTIONS"    , AV_CODEC_ID_WEBVTT},
     {"D_WEBVTT/DESCRIPTIONS", AV_CODEC_ID_WEBVTT},
     {"D_WEBVTT/METADATA"    , AV_CODEC_ID_WEBVTT},
+    {"S_TEXT/WEBVTT"        , AV_CODEC_ID_WEBVTT},
 
     {"S_TEXT/UTF8"      , AV_CODEC_ID_SUBRIP},
     {"S_TEXT/UTF8"      , AV_CODEC_ID_TEXT},
diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index 0e150f9138..11af2cface 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -3761,68 +3761,83 @@ static int matroska_parse_prores(MatroskaTrack *track,
     return 0;
 }
 
+static int matroska_webvtt_read_line(uint8_t** begin, uint8_t* end)
+{
+    uint8_t *p = *begin;
+    int len = -1;
+
+    while (p < end) {
+        if (*p == '\r' || *p == '\n') {
+            len = p - *begin;
+            if (*p == '\r')
+                p++;
+            break;
+        }
+        p++;
+    }
+
+    if (p >= end || *p != '\n')
+        return AVERROR_INVALIDDATA;
+    p++;
+    *begin = p;
+    return len;
+}
+
 static int matroska_parse_webvtt(MatroskaDemuxContext *matroska,
                                  MatroskaTrack *track,
                                  AVStream *st,
                                  uint8_t *data, int data_len,
+                                 MatroskaBlockMore *blockmore, int nb_blockmore,
                                  uint64_t timecode,
                                  uint64_t duration,
                                  int64_t pos)
 {
     AVPacket *pkt = matroska->pkt;
     uint8_t *id, *settings, *text, *buf;
-    int id_len, settings_len, text_len;
+    int id_len = -1, settings_len = -1, text_len = -1;
     uint8_t *p, *q;
     int err;
+    const int webm_style = !strncmp(track->codec_id, "D_WEBVTT/", 9);
 
     if (data_len <= 0)
         return AVERROR_INVALIDDATA;
 
-    p = data;
-    q = data + data_len;
-
-    id = p;
-    id_len = -1;
-    while (p < q) {
-        if (*p == '\r' || *p == '\n') {
-            id_len = p - id;
-            if (*p == '\r')
-                p++;
-            break;
+    if (webm_style) {
+        p = data;
+        q = data + data_len;
+        id = p;
+        id_len = matroska_webvtt_read_line(&p, q);
+        if (id_len < 0)
+            return AVERROR_INVALIDDATA;
+        settings = p;
+        settings_len = matroska_webvtt_read_line(&p, q);
+        if (settings_len < 0)
+            return AVERROR_INVALIDDATA;
+        text = p;
+        text_len = data + data_len - p;
+        while (text_len > 0) {
+            const int len = text_len - 1;
+            const uint8_t c = p[len];
+            if (c != '\r' && c != '\n')
+                break;
+            text_len = len;
         }
-        p++;
-    }
-
-    if (p >= q || *p != '\n')
-        return AVERROR_INVALIDDATA;
-    p++;
-
-    settings = p;
-    settings_len = -1;
-    while (p < q) {
-        if (*p == '\r' || *p == '\n') {
-            settings_len = p - settings;
-            if (*p == '\r')
-                p++;
-            break;
+    } else {
+        if (nb_blockmore > 0) {
+            p = blockmore->additional.data;
+            q = p + blockmore->additional.size;
+            settings = p;
+            settings_len = matroska_webvtt_read_line(&p, q);
+            if (settings_len < 0)
+                return AVERROR_INVALIDDATA;
+            id = p;
+            id_len = matroska_webvtt_read_line(&p, q);
+            if (id_len < 0)
+                return AVERROR_INVALIDDATA;
         }
-        p++;
-    }
-
-    if (p >= q || *p != '\n')
-        return AVERROR_INVALIDDATA;
-    p++;
-
-    text = p;
-    text_len = q - p;
-    while (text_len > 0) {
-        const int len = text_len - 1;
-        const uint8_t c = p[len];
-        if (c != '\r' && c != '\n')
-            break;
-        text_len = len;
+        text = data;
+        text_len = data_len;
     }
-
     if (text_len <= 0)
         return AVERROR_INVALIDDATA;
 
@@ -4215,6 +4230,7 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, AVBufferRef *buf
         } else if (st->codecpar->codec_id == AV_CODEC_ID_WEBVTT) {
             res = matroska_parse_webvtt(matroska, track, st,
                                         out_data, out_size,
+                                        blockmore, nb_blockmore,
                                         timecode, lace_duration,
                                         pos);
             if (!buf)
-- 
2.39.5 (Apple Git-154)

