PR #23447 opened by James Almer (jamrial)
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23447
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23447.patch

When a demuxer reports the last packet with a duration smaller than the real 
coded duration, this information is not relayed to the decoder, which will 
happily output all the trimming samples anyway.
Fix this by ensuring we export a discard padding information as side data, as 
shown with the mov demuxer in the last commit.


>From 65048d526d0c227c31602e10c609080e39466969 Mon Sep 17 00:00:00 2001
From: James Almer <[email protected]>
Date: Wed, 10 Jun 2026 18:11:58 -0300
Subject: [PATCH 1/4] avcodec/mpeg4audio: add a frame_length field to
 MPEG4AudioConfig

Will be useful to get fixed frame sizes outside decoders.

Signed-off-by: James Almer <[email protected]>
---
 libavcodec/mpeg4audio.c | 47 +++++++++++++++++++++++++++++++++++++++++
 libavcodec/mpeg4audio.h |  1 +
 2 files changed, 48 insertions(+)

diff --git a/libavcodec/mpeg4audio.c b/libavcodec/mpeg4audio.c
index fbd2a8f811..0430568b63 100644
--- a/libavcodec/mpeg4audio.c
+++ b/libavcodec/mpeg4audio.c
@@ -133,6 +133,53 @@ int ff_mpeg4audio_get_config_gb(MPEG4AudioConfig *c, 
GetBitContext *gb,
         if (ret < 0)
             return ret;
     }
+    switch (c->object_type) {
+    case AOT_AAC_MAIN:
+    case AOT_AAC_LC:
+    case AOT_AAC_SSR:
+    case AOT_AAC_LTP:
+    case AOT_ER_AAC_LC:
+        c->frame_length_short = get_bits1(gb);
+        c->frame_length = c->frame_length_short ? 960 : 1024;
+        break;
+    case AOT_ER_AAC_LD:
+    case AOT_ER_AAC_ELD:
+        c->frame_length_short = get_bits1(gb);
+        c->frame_length = c->frame_length_short ? 480 : 512;
+        break;
+    case AOT_USAC: {
+        int core_sbr_frame_len_idx, sbr_ratio;
+        int ratio_mult, ratio_dec, frame_length;
+        if (get_bits(gb, 5) == 0x1f) /* usacSamplingFrequencyIndex */ {
+            skip_bits(gb, 24); /* usacSamplingFrequency */
+        }
+        core_sbr_frame_len_idx = get_bits(gb, 3);
+        c->frame_length_short = core_sbr_frame_len_idx == 0 ||
+                                core_sbr_frame_len_idx == 2;
+        sbr_ratio = core_sbr_frame_len_idx == 2 ? 2 :
+                    core_sbr_frame_len_idx == 3 ? 3 :
+                    core_sbr_frame_len_idx == 4 ? 1 :
+                    0;
+
+        if (sbr_ratio == 2) {
+            ratio_mult = 8;
+            ratio_dec = 3;
+        } else if (sbr_ratio == 3) {
+            ratio_mult = 2;
+            ratio_dec = 1;
+        } else if (sbr_ratio == 4) {
+            ratio_mult = 4;
+            ratio_dec = 1;
+        } else {
+            ratio_mult = 1;
+            ratio_dec = 1;
+        }
+
+        frame_length = c->frame_length_short ? 768 : 1024;
+        c->frame_length = (frame_length * ratio_mult) / ratio_dec;
+        break;
+    }
+    }
 
     if (c->ext_object_type != AOT_SBR && sync_extension) {
         while (get_bits_left(gb) > 15) {
diff --git a/libavcodec/mpeg4audio.h b/libavcodec/mpeg4audio.h
index 0819e48a42..089d6abf9f 100644
--- a/libavcodec/mpeg4audio.h
+++ b/libavcodec/mpeg4audio.h
@@ -39,6 +39,7 @@ typedef struct MPEG4AudioConfig {
     int channels;
     int ps;  ///< -1 implicit, 1 presence
     int frame_length_short;
+    int frame_length; ///< derived value
 } MPEG4AudioConfig;
 
 extern const int     ff_mpeg4audio_sample_rates[16];
-- 
2.52.0


>From 1c43c13c5ba8d7437e6267c95b3ed5d31adc76fc Mon Sep 17 00:00:00 2001
From: James Almer <[email protected]>
Date: Wed, 10 Jun 2026 18:13:26 -0300
Subject: [PATCH 2/4] avformat/isom: export codecpar frame_size

Signed-off-by: James Almer <[email protected]>
---
 libavformat/isom.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/libavformat/isom.c b/libavformat/isom.c
index 29171fea40..8a1128ac35 100644
--- a/libavformat/isom.c
+++ b/libavformat/isom.c
@@ -370,10 +370,23 @@ int ff_mp4_read_dec_config_descr(void *logctx, AVStream 
*st, AVIOContext *pb)
                 st->codecpar->sample_rate = cfg.ext_sample_rate;
             else
                 st->codecpar->sample_rate = cfg.sample_rate;
+            switch (cfg.object_type) {
+            case AOT_AAC_MAIN:
+            case AOT_AAC_LC:
+            case AOT_AAC_SSR:
+            case AOT_AAC_LTP:
+            case AOT_ER_AAC_LC:
+            case AOT_ER_AAC_LD:
+            case AOT_ER_AAC_ELD:
+            case AOT_USAC:
+                st->codecpar->frame_size = cfg.frame_length;
+                break;
+            }
             av_log(logctx, AV_LOG_TRACE, "mp4a config channels %d obj %d ext 
obj %d "
-                    "sample rate %d ext sample rate %d\n", cfg.channels,
+                    "sample rate %d ext sample rate %d frame_length %d\n", 
cfg.channels,
                     cfg.object_type, cfg.ext_object_type,
-                    cfg.sample_rate, cfg.ext_sample_rate);
+                    cfg.sample_rate, cfg.ext_sample_rate,
+                    cfg.frame_length);
             if (!(st->codecpar->codec_id = ff_codec_get_id(mp4_audio_types,
                                                         cfg.object_type)))
                 st->codecpar->codec_id = AV_CODEC_ID_AAC;
-- 
2.52.0


>From 341d25cfab3d73f1a4be9b8b6767a465d7b80b50 Mon Sep 17 00:00:00 2001
From: James Almer <[email protected]>
Date: Wed, 10 Jun 2026 18:19:09 -0300
Subject: [PATCH 3/4] avformat/demux: discard trimming samples in codecs with
 fixed frame size

When a demuxer reports the last packet with a duration smaller than the real 
coded duration,
this information is not relayed to the decoder, which will happily output all 
the trimming
samples anyway.
Fix that by ensuring we export a discard padding information as side data.

Signed-off-by: James Almer <[email protected]>
---
 libavformat/demux.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/demux.c b/libavformat/demux.c
index 55085210de..516f286e19 100644
--- a/libavformat/demux.c
+++ b/libavformat/demux.c
@@ -1536,7 +1536,7 @@ static int read_frame_internal(AVFormatContext *s, 
AVPacket *pkt)
         if (sti->first_discard_sample && pkt->pts != AV_NOPTS_VALUE) {
             int64_t pts = pkt->pts - (is_relative(pkt->pts) ? RELATIVE_TS_BASE 
: 0);
             int64_t sample = ts_to_samples(st, pts);
-            int64_t duration = ts_to_samples(st, pkt->duration);
+            int64_t duration = FFMAX(st->codecpar->frame_size, 
ts_to_samples(st, pkt->duration));
             int64_t end_sample = sample + duration;
             if (duration > 0 && end_sample >= sti->first_discard_sample &&
                 sample < sti->last_discard_sample)
-- 
2.52.0


>From 76baf179bed0bfd5601cfb0aaac44d5e4c262d19 Mon Sep 17 00:00:00 2001
From: James Almer <[email protected]>
Date: Wed, 10 Jun 2026 18:28:09 -0300
Subject: [PATCH 4/4] avformat/mov: export information about the last actual
 sample in a stream

This way the generic demux code can calculate how many trimming samples should 
be
discarded.

Signed-off-by: James Almer <[email protected]>
---
 libavformat/mov.c                                  | 6 ++++++
 tests/fate/aac.mak                                 | 2 ++
 tests/ref/fate/audiomatch-faac-16000-mono-lc-m4a   | 2 +-
 tests/ref/fate/audiomatch-faac-16000-stereo-lc-m4a | 2 +-
 tests/ref/fate/audiomatch-faac-44100-mono-lc-m4a   | 2 +-
 tests/ref/fate/audiomatch-faac-44100-stereo-lc-m4a | 2 +-
 tests/ref/fate/copy-psp                            | 2 +-
 tests/ref/fate/gaplessenc-itunes-to-ipod-aac       | 4 ++--
 tests/ref/fate/gaplessenc-pcm-to-mov-aac           | 4 ++--
 tests/ref/fate/generic-tags-remux-mov              | 2 +-
 tests/ref/fate/mov-aac-2048-priming                | 2 +-
 11 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/libavformat/mov.c b/libavformat/mov.c
index 997a92dc0b..8bde179ba4 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -11620,6 +11620,12 @@ static int mov_finalize_packet(AVFormatContext *s, 
AVStream *st, AVIndexEntry *s
         pkt->pts = pkt->dts;
     }
 
+    if (sc->stts_count && (sc->tts_index == sc->tts_count - 1)) {
+        ffstream(st)->first_discard_sample = av_rescale(pkt->pts, 
st->time_base.num * st->codecpar->sample_rate,
+                                                        st->time_base.den) + 
pkt->duration;
+        ffstream(st)->last_discard_sample = st->duration;
+    }
+
     if (sc->tts_data && sc->tts_index < sc->tts_count) {
         /* update tts context */
         sc->tts_sample++;
diff --git a/tests/fate/aac.mak b/tests/fate/aac.mak
index 8a7f1da567..504b7fcffc 100644
--- a/tests/fate/aac.mak
+++ b/tests/fate/aac.mak
@@ -65,10 +65,12 @@ fate-aac-ap05_48: REF = $(SAMPLES)/aac/ap05_48.s16
 FATE_AAC += fate-aac-fd_2_c1_ms_0x01
 fate-aac-fd_2_c1_ms_0x01: CMD = pcm -i 
$(TARGET_SAMPLES)/aac/Fd_2_c1_Ms_0x01.mp4
 fate-aac-fd_2_c1_ms_0x01: REF = $(SAMPLES)/aac/Fd_2_c1_Ms_0x01.s16
+fate-aac-fd_2_c1_ms_0x01: SIZE_TOLERANCE = 3052
 
 FATE_AAC += fate-aac-fd_2_c1_ms_0x04
 fate-aac-fd_2_c1_ms_0x04: CMD = pcm -i 
$(TARGET_SAMPLES)/aac/Fd_2_c1_Ms_0x04.mp4
 fate-aac-fd_2_c1_ms_0x04: REF = $(SAMPLES)/aac/Fd_2_c1_Ms_0x04.s16
+fate-aac-fd_2_c1_ms_0x04: SIZE_TOLERANCE = 3436
 
 FATE_AAC += fate-aac-er_ad6000np_44_ep0
 fate-aac-er_ad6000np_44_ep0: CMD = pcm -i 
$(TARGET_SAMPLES)/aac/er_ad6000np_44_ep0.mp4
diff --git a/tests/ref/fate/audiomatch-faac-16000-mono-lc-m4a 
b/tests/ref/fate/audiomatch-faac-16000-mono-lc-m4a
index b2615dfc83..8204a116c9 100644
--- a/tests/ref/fate/audiomatch-faac-16000-mono-lc-m4a
+++ b/tests/ref/fate/audiomatch-faac-16000-mono-lc-m4a
@@ -1 +1 @@
-presig: 0 postsig:768 c: 1.0011 lenerr:768
+presig: 0 postsig:0 c: 1.0011 lenerr:0
diff --git a/tests/ref/fate/audiomatch-faac-16000-stereo-lc-m4a 
b/tests/ref/fate/audiomatch-faac-16000-stereo-lc-m4a
index b0fa495fa5..8204a116c9 100644
--- a/tests/ref/fate/audiomatch-faac-16000-stereo-lc-m4a
+++ b/tests/ref/fate/audiomatch-faac-16000-stereo-lc-m4a
@@ -1 +1 @@
-presig: 0 postsig:1536 c: 1.0011 lenerr:1536
+presig: 0 postsig:0 c: 1.0011 lenerr:0
diff --git a/tests/ref/fate/audiomatch-faac-44100-mono-lc-m4a 
b/tests/ref/fate/audiomatch-faac-44100-mono-lc-m4a
index 66faeb696c..8d544fbba8 100644
--- a/tests/ref/fate/audiomatch-faac-44100-mono-lc-m4a
+++ b/tests/ref/fate/audiomatch-faac-44100-mono-lc-m4a
@@ -1 +1 @@
-presig: 0 postsig:888 c: 0.9882 lenerr:888
+presig: 0 postsig:0 c: 0.9882 lenerr:0
diff --git a/tests/ref/fate/audiomatch-faac-44100-stereo-lc-m4a 
b/tests/ref/fate/audiomatch-faac-44100-stereo-lc-m4a
index c40a48e1b9..8d544fbba8 100644
--- a/tests/ref/fate/audiomatch-faac-44100-stereo-lc-m4a
+++ b/tests/ref/fate/audiomatch-faac-44100-stereo-lc-m4a
@@ -1 +1 @@
-presig: 0 postsig:1776 c: 0.9882 lenerr:1776
+presig: 0 postsig:0 c: 0.9882 lenerr:0
diff --git a/tests/ref/fate/copy-psp b/tests/ref/fate/copy-psp
index 977bef5c3e..c04e7088f5 100644
--- a/tests/ref/fate/copy-psp
+++ b/tests/ref/fate/copy-psp
@@ -262,7 +262,7 @@
 0,     288288,     288288,     3003,    15613, 0x8a293998, F=0x0
 1,     154624,     154624,     1025,      234, 0xa08f77cc
 0,     291291,     291291,     3003,    12489, 0xbfba1313, F=0x0
-1,     155649,     155649,     1023,      231, 0xab257042
+1,     155649,     155649,     1023,      231, 0xab257042, S=1, Skip Samples,  
     10, 0x00060001
 0,     294294,     294294,     3003,    18231, 0xaaaa9157, F=0x0
 0,     297297,     297297,     3003,    19204, 0x18c54ec1, F=0x0
 0,     300300,     300300,     3003,    19047, 0x6cb3e900, F=0x0
diff --git a/tests/ref/fate/gaplessenc-itunes-to-ipod-aac 
b/tests/ref/fate/gaplessenc-itunes-to-ipod-aac
index a8da904c32..6c92c64521 100644
--- a/tests/ref/fate/gaplessenc-itunes-to-ipod-aac
+++ b/tests/ref/fate/gaplessenc-itunes-to-ipod-aac
@@ -22,7 +22,7 @@ packet|pts=98304|dts=98304|duration=1024|flags=K__
 packet|pts=99328|dts=99328|duration=1024|flags=K__
 packet|pts=100352|dts=100352|duration=1024|flags=K__
 packet|pts=101376|dts=101376|duration=1024|flags=K__
-packet|pts=102400|dts=102400|duration=960|flags=K__
+packet|pts=102400|dts=102400|duration=960|flags=K__|
 stream|nb_read_packets=102
 frame|pts=0|pkt_dts=0|best_effort_timestamp=0|nb_samples=1024
 frame|pts=1024|pkt_dts=1024|best_effort_timestamp=1024|nb_samples=1024
@@ -39,5 +39,5 @@ 
frame|pts=98304|pkt_dts=98304|best_effort_timestamp=98304|nb_samples=1024
 frame|pts=99328|pkt_dts=99328|best_effort_timestamp=99328|nb_samples=1024
 frame|pts=100352|pkt_dts=100352|best_effort_timestamp=100352|nb_samples=1024
 frame|pts=101376|pkt_dts=101376|best_effort_timestamp=101376|nb_samples=1024
-frame|pts=102400|pkt_dts=102400|best_effort_timestamp=102400|nb_samples=1024
+frame|pts=102400|pkt_dts=102400|best_effort_timestamp=102400|nb_samples=960
 stream|nb_read_frames=101
diff --git a/tests/ref/fate/gaplessenc-pcm-to-mov-aac 
b/tests/ref/fate/gaplessenc-pcm-to-mov-aac
index 09ffaed58d..7a6edc7e70 100644
--- a/tests/ref/fate/gaplessenc-pcm-to-mov-aac
+++ b/tests/ref/fate/gaplessenc-pcm-to-mov-aac
@@ -22,7 +22,7 @@ packet|pts=524288|dts=524288|duration=1024|flags=K__
 packet|pts=525312|dts=525312|duration=1024|flags=K__
 packet|pts=526336|dts=526336|duration=1024|flags=K__
 packet|pts=527360|dts=527360|duration=1024|flags=K__
-packet|pts=528384|dts=528384|duration=816|flags=K__
+packet|pts=528384|dts=528384|duration=816|flags=K__|
 stream|nb_read_packets=518
 frame|pts=0|pkt_dts=0|best_effort_timestamp=0|nb_samples=1024
 frame|pts=1024|pkt_dts=1024|best_effort_timestamp=1024|nb_samples=1024
@@ -39,5 +39,5 @@ 
frame|pts=524288|pkt_dts=524288|best_effort_timestamp=524288|nb_samples=1024
 frame|pts=525312|pkt_dts=525312|best_effort_timestamp=525312|nb_samples=1024
 frame|pts=526336|pkt_dts=526336|best_effort_timestamp=526336|nb_samples=1024
 frame|pts=527360|pkt_dts=527360|best_effort_timestamp=527360|nb_samples=1024
-frame|pts=528384|pkt_dts=528384|best_effort_timestamp=528384|nb_samples=1024
+frame|pts=528384|pkt_dts=528384|best_effort_timestamp=528384|nb_samples=816
 stream|nb_read_frames=517
diff --git a/tests/ref/fate/generic-tags-remux-mov 
b/tests/ref/fate/generic-tags-remux-mov
index 41dd43fcbf..8a171eef31 100644
--- a/tests/ref/fate/generic-tags-remux-mov
+++ b/tests/ref/fate/generic-tags-remux-mov
@@ -50,7 +50,7 @@ e08290a2eaf72d1565f4a77a5b9093f8 
*tests/data/fate/generic-tags-remux-mov.mp4
 0,      40960,      40960,     1024,      192, 0x72736037
 0,      41984,      41984,     1024,      204, 0xb1c26c14
 0,      43008,      43008,     1024,      252, 0x2b818d52
-0,      44032,      44032,       68,        5, 0x03c001be
+0,      44032,      44032,       68,        5, 0x03c001be, S=1, Skip Samples,  
     10, 0x047700bf
 [FORMAT]
 TAG:major_brand=isom
 TAG:minor_version=512
diff --git a/tests/ref/fate/mov-aac-2048-priming 
b/tests/ref/fate/mov-aac-2048-priming
index feea5e36eb..6bbc9e20f8 100644
--- a/tests/ref/fate/mov-aac-2048-priming
+++ b/tests/ref/fate/mov-aac-2048-priming
@@ -214,4 +214,4 @@ 
packet|codec_type=audio|stream_index=0|pts=215040|pts_time=4.876190|dts=215040|d
 
packet|codec_type=audio|stream_index=0|pts=216064|pts_time=4.899410|dts=216064|dts_time=4.899410|duration=1024|duration_time=0.023220|size=203|pos=42900|flags=K__
 
packet|codec_type=audio|stream_index=0|pts=217088|pts_time=4.922630|dts=217088|dts_time=4.922630|duration=1024|duration_time=0.023220|size=198|pos=43103|flags=K__
 
packet|codec_type=audio|stream_index=0|pts=218112|pts_time=4.945850|dts=218112|dts_time=4.945850|duration=1024|duration_time=0.023220|size=284|pos=43301|flags=K__
-packet|codec_type=audio|stream_index=0|pts=219136|pts_time=4.969070|dts=219136|dts_time=4.969070|duration=340|duration_time=0.007710|size=5|pos=43585|flags=K__
+packet|codec_type=audio|stream_index=0|pts=219136|pts_time=4.969070|dts=219136|dts_time=4.969070|duration=340|duration_time=0.007710|size=5|pos=43585|flags=K__|side_datum/skip_samples:side_data_type=Skip
 
Samples|side_datum/skip_samples:skip_samples=0|side_datum/skip_samples:discard_padding=684|side_datum/skip_samples:skip_reason=0|side_datum/skip_samples:discard_reason=0
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to