PR #23085 opened by vigneshvg URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23085 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23085.patch
SMPTE 2094 App5 metadata in MP4 will be carried in an it35 track which is an auxiliary track that is associated with the main video track via a cdsc or a rndr reference. Support parsing of such tracks and attach the metadata to the corresponding video packet's side data. Signed-off-by: Vignesh Venkat <[email protected]> >From 52896f67b00dc3487f268d1afdc3f944363188a5 Mon Sep 17 00:00:00 2001 From: Vignesh Venkat <[email protected]> Date: Wed, 29 Apr 2026 17:02:43 -0700 Subject: [PATCH] avformat/mov: Support it35 metadata track SMPTE 2094 App5 metadata in MP4 will be carried in an it35 track which is an auxiliary track that is associated with the main video track via a cdsc or a rndr reference. Support parsing of such tracks and attach the metadata to the corresponding video packet's side data. Signed-off-by: Vignesh Venkat <[email protected]> --- libavformat/isom.h | 15 +++++ libavformat/mov.c | 142 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 157 insertions(+) diff --git a/libavformat/isom.h b/libavformat/isom.h index d7e138585a..677d642253 100644 --- a/libavformat/isom.h +++ b/libavformat/isom.h @@ -32,6 +32,7 @@ #include "libavutil/ambient_viewing_environment.h" #include "libavutil/spherical.h" #include "libavutil/stereo3d.h" +#include "libavutil/hdr_dynamic_metadata.h" #include "avio.h" #include "internal.h" @@ -170,6 +171,13 @@ typedef struct MOVIndexRange { int64_t end; } MOVIndexRange; +typedef struct MOVMetadataSample { + int64_t pts; + int64_t duration; + AVDynamicHDRSmpte2094App5 *hdr; + size_t size; +} MOVMetadataSample; + typedef struct MOVStreamContext { AVIOContext *pb; int refcount; @@ -284,6 +292,13 @@ typedef struct MOVStreamContext { struct IAMFDemuxContext *iamf; int iamf_stream_offset; + int is_it35; + int tref_cdsc_id; + int tref_rndr_id; + + int is_smpte2094_app5; + MOVMetadataSample *metadata_samples; + int nb_metadata_samples; } MOVStreamContext; typedef struct HEIFItemRef { diff --git a/libavformat/mov.c b/libavformat/mov.c index 0d982e5a79..9dbd6c90ab 100644 --- a/libavformat/mov.c +++ b/libavformat/mov.c @@ -67,6 +67,8 @@ #include "riff.h" #include "isom.h" #include "libavcodec/get_bits.h" +#include "libavcodec/itut35.h" +#include "libavutil/hdr_dynamic_metadata.h" #include "id3v1.h" #include "mov_chan.h" #include "replaygain.h" @@ -815,6 +817,11 @@ static int mov_read_hdlr(MOVContext *c, AVIOContext *pb, MOVAtom atom) st->codecpar->codec_id = AV_CODEC_ID_MP2; else if ((type == MKTAG('s','u','b','p')) || (type == MKTAG('c','l','c','p'))) st->codecpar->codec_type = AVMEDIA_TYPE_SUBTITLE; + else if (type == MKTAG('i','t','3','5')) { + MOVStreamContext *sc = st->priv_data; + st->codecpar->codec_type = AVMEDIA_TYPE_DATA; + sc->is_it35 = 1; + } avio_rb32(pb); /* component manufacture */ avio_rb32(pb); /* component flags */ @@ -2571,6 +2578,31 @@ static int mov_read_sbas(MOVContext* c, AVIOContext* pb, MOVAtom atom) return 0; } +static int mov_read_tref_aux(MOVContext *c, AVIOContext *pb, MOVAtom atom) +{ + AVStream *st; + MOVStreamContext *sc; + int num_refs = atom.size / 4; + uint32_t ref_id; + + if (c->fc->nb_streams < 1 || num_refs <= 0) + return 0; + + if (num_refs > 1) + return AVERROR_PATCHWELCOME; + + st = c->fc->streams[c->fc->nb_streams - 1]; + sc = st->priv_data; + + ref_id = avio_rb32(pb); + if (atom.type == MKTAG('c','d','s','c')) + sc->tref_cdsc_id = ref_id; + else if (atom.type == MKTAG('r','n','d','r')) + sc->tref_rndr_id = ref_id; + + return 0; +} + /** * An strf atom is a BITMAPINFOHEADER struct. This struct is 40 bytes itself, * but can have extradata appended at the end after the 40 bytes belonging @@ -2943,6 +2975,20 @@ static int mov_parse_stsd_data(MOVContext *c, AVIOContext *pb, } } } + } else if (st->codecpar->codec_tag == MKTAG('i','t','3','5')) { + int id_len = avio_r8(pb); + if (id_len >= 5) { + uint8_t id_bytes[5]; + avio_read(pb, id_bytes, 5); + if (id_bytes[0] == ITU_T_T35_COUNTRY_CODE_US && + AV_RB16(id_bytes + 1) == ITU_T_T35_PROVIDER_CODE_SMPTE && + AV_RB16(id_bytes + 3) == 0x0001) { // Provider oriented code + sc->is_smpte2094_app5 = 1; + } + avio_skip(pb, id_len - 5); + } else { + avio_skip(pb, id_len); + } } else { /* other codec type, just skip (rtp, mp4s ...) */ avio_skip(pb, size); @@ -3142,6 +3188,10 @@ int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries) sc->dref_id= dref_id; sc->format = format; + if (format == MKTAG('i','t','3','5')) { + sc->is_it35 = 1; + } + id = mov_codec_id(st, format); av_log(c->fc, AV_LOG_TRACE, @@ -9560,6 +9610,8 @@ static const MOVParseTableEntry mov_default_parse_table[] = { { MKTAG('p','a','s','p'), mov_read_pasp }, { MKTAG('c','l','a','p'), mov_read_clap }, { MKTAG('s','b','a','s'), mov_read_sbas }, +{ MKTAG('c','d','s','c'), mov_read_tref_aux }, +{ MKTAG('r','n','d','r'), mov_read_tref_aux }, { MKTAG('s','i','d','x'), mov_read_sidx }, { MKTAG('s','t','b','l'), mov_read_default }, { MKTAG('s','t','c','o'), mov_read_stco }, @@ -10007,6 +10059,57 @@ static int mov_read_rtmd_track(AVFormatContext *s, AVStream *st) return 0; } +static int mov_read_it35_track(AVFormatContext *s, AVStream *st) +{ + MOVStreamContext *sc = st->priv_data; + FFStream *const sti = ffstream(st); + int64_t cur_pos = avio_tell(sc->pb); + int i; + + if (!sc->is_smpte2094_app5 || !sti->nb_index_entries) + return 0; + + sc->metadata_samples = av_calloc(sti->nb_index_entries, sizeof(*sc->metadata_samples)); + if (!sc->metadata_samples) + return AVERROR(ENOMEM); + sc->nb_metadata_samples = sti->nb_index_entries; + + for (i = 0; i < sti->nb_index_entries; ++i) { + AVIndexEntry *e = &sti->index_entries[i]; + MOVMetadataSample *m = &sc->metadata_samples[i]; + uint8_t *tmp_data; + + m->pts = e->timestamp; + m->duration = e->min_distance; + if (m->duration == 0 && sc->tts_count > 0 && sc->tts_data) { + m->duration = sc->tts_data[0].duration; + } + tmp_data = av_malloc(e->size); + if (!tmp_data) + return AVERROR(ENOMEM); + + avio_seek(sc->pb, e->pos, SEEK_SET); + avio_read(sc->pb, tmp_data, e->size); + + m->hdr = av_dynamic_hdr_smpte2094_app5_alloc(&m->size); + if (!m->hdr) { + av_free(tmp_data); + return AVERROR(ENOMEM); + } + + if (av_dynamic_hdr_smpte2094_app5_from_t35(m->hdr, tmp_data, e->size) < 0) { + av_log(s, AV_LOG_WARNING, "Failed to parse it35 metadata in mov_read_it35_track\n"); + av_freep(&m->hdr); + av_free(tmp_data); + return AVERROR_INVALIDDATA; + } + av_free(tmp_data); + } + + avio_seek(sc->pb, cur_pos, SEEK_SET); + return 0; +} + static int mov_read_timecode_track(AVFormatContext *s, AVStream *st) { MOVStreamContext *sc = st->priv_data; @@ -10126,6 +10229,12 @@ static void mov_free_stream_context(AVFormatContext *s, AVStream *st) ff_iamf_read_deinit(sc->iamf); #endif av_freep(&sc->iamf); + if (sc->metadata_samples) { + for (int i = 0; i < sc->nb_metadata_samples; ++i) { + av_freep(&sc->metadata_samples[i].hdr); + } + av_freep(&sc->metadata_samples); + } } static int mov_read_close(AVFormatContext *s) @@ -10958,6 +11067,8 @@ static int mov_read_header(AVFormatContext *s) mov_read_timecode_track(s, s->streams[i]); } else if (s->streams[i]->codecpar->codec_tag == AV_RL32("rtmd")) { mov_read_rtmd_track(s, s->streams[i]); + } else if (s->streams[i]->codecpar->codec_tag == AV_RL32("it35")) { + mov_read_it35_track(s, s->streams[i]); } } @@ -11505,6 +11616,37 @@ static int mov_read_packet(AVFormatContext *s, AVPacket *pkt) } return ret; } + + if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) { + AVStream *it35_st = NULL; + for (int k = 0; k < mov->fc->nb_streams; ++k) { + AVStream *st_k = mov->fc->streams[k]; + MOVStreamContext *sc_k = st_k->priv_data; + if (sc_k && sc_k->is_smpte2094_app5 && + (sc_k->tref_cdsc_id == st->id || sc_k->tref_rndr_id == st->id)) { + it35_st = st_k; + break; + } + } + if (it35_st) { + MOVStreamContext *it35_sc = it35_st->priv_data; + for (int k = 0; k < it35_sc->nb_metadata_samples; ++k) { + MOVMetadataSample *m = &it35_sc->metadata_samples[k]; + // Check if the video timestamp falls within the metadata's PTS and duration. + if (m->hdr && sample->timestamp >= m->pts && sample->timestamp < m->pts + m->duration) { + uint8_t *side_data = av_packet_new_side_data(pkt, AV_PKT_DATA_DYNAMIC_HDR_SMPTE_2094_APP5, m->size); + if (!side_data) { + av_packet_unref(pkt); + return AVERROR(ENOMEM); + } + memcpy(side_data, m->hdr, m->size); + // If multiple metadata packets match the same video frame, simply use the first one. + break; + } + } + } + } + #if CONFIG_DV_DEMUXER if (mov->dv_demux && sc->dv_audio_container) { ret = avpriv_dv_produce_packet(mov->dv_demux, NULL, pkt->data, pkt->size, pkt->pos); -- 2.52.0 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
