On Fri, Apr 14, 2023 at 6:01 PM Roy Funderburk <royffm...@funderburk.us> wrote:
> Parsing and demuxing of DTS-UHD input files per ETSI TS 102 114 is added > as demuxer "dtsuhd". movenc supports DTS-UHD audio track. > > Signed-off-by: Roy Funderburk <roy.funderb...@xperi.com> > --- > Changelog | 1 + > configure | 1 + > doc/general_contents.texi | 1 + > libavcodec/Makefile | 1 + > libavcodec/codec_desc.c | 7 + > libavcodec/codec_id.h | 1 + > libavcodec/dtsuhd_common.c | 991 +++++++++++++++++++++++++++++++++++++ > libavcodec/dtsuhd_common.h | 84 ++++ > libavcodec/dtsuhd_parser.c | 141 ++++++ > libavcodec/parsers.c | 1 + > libavformat/Makefile | 1 + > libavformat/allformats.c | 1 + > libavformat/dtshddec.c | 2 +- > libavformat/dtsuhddec.c | 214 ++++++++ > libavformat/movenc.c | 32 ++ > libavformat/version.h | 2 +- > 16 files changed, 1479 insertions(+), 2 deletions(-) > create mode 100644 libavcodec/dtsuhd_common.c > create mode 100644 libavcodec/dtsuhd_common.h > create mode 100644 libavcodec/dtsuhd_parser.c > create mode 100644 libavformat/dtsuhddec.c > > diff --git a/Changelog b/Changelog > index a40f32c23f..f683b49bb2 100644 > --- a/Changelog > +++ b/Changelog > @@ -3,6 +3,7 @@ releases are sorted from youngest to oldest. > version <next>: > - libaribcaption decoder > +- DTS-UHD demuxer > version 6.0: > - Radiance HDR image support > diff --git a/configure b/configure > index 033db7442d..557821ceef 100755 > --- a/configure > +++ b/configure > @@ -3425,6 +3425,7 @@ dash_demuxer_deps="libxml2" > dirac_demuxer_select="dirac_parser" > dts_demuxer_select="dca_parser" > dtshd_demuxer_select="dca_parser" > +dtsuhd_demuxer_select="dtsuhd_parser" > dv_demuxer_select="dvprofile" > dv_muxer_select="dvprofile" > dxa_demuxer_select="riffdec" > diff --git a/doc/general_contents.texi b/doc/general_contents.texi > index 2eeebd847d..e1ba9c4597 100644 > --- a/doc/general_contents.texi > +++ b/doc/general_contents.texi > @@ -597,6 +597,7 @@ library: > @item raw DNxHD @tab X @tab X > @item raw DTS @tab X @tab X > @item raw DTS-HD @tab @tab X > +@item raw DTS-UHD @tab @tab > @item raw E-AC-3 @tab X @tab X > @item raw FLAC @tab X @tab X > @item raw GSM @tab @tab X > diff --git a/libavcodec/Makefile b/libavcodec/Makefile > index aa10fbfcf8..f57564e9eb 100644 > --- a/libavcodec/Makefile > +++ b/libavcodec/Makefile > @@ -1155,6 +1155,7 @@ OBJS-$(CONFIG_DIRAC_PARSER) += > dirac_parser.o > OBJS-$(CONFIG_DNXHD_PARSER) += dnxhd_parser.o dnxhddata.o > OBJS-$(CONFIG_DOLBY_E_PARSER) += dolby_e_parser.o > dolby_e_parse.o > OBJS-$(CONFIG_DPX_PARSER) += dpx_parser.o > +OBJS-$(CONFIG_DTSUHD_PARSER) += dtsuhd_parser.o dtsuhd_common.o > OBJS-$(CONFIG_DVAUDIO_PARSER) += dvaudio_parser.o > OBJS-$(CONFIG_DVBSUB_PARSER) += dvbsub_parser.o > OBJS-$(CONFIG_DVD_NAV_PARSER) += dvd_nav_parser.o > diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c > index efdcb59bc9..a58315f46b 100644 > --- a/libavcodec/codec_desc.c > +++ b/libavcodec/codec_desc.c > @@ -3369,6 +3369,13 @@ static const AVCodecDescriptor codec_descriptors[] > = { > .long_name = NULL_IF_CONFIG_SMALL("RKA (RK Audio)"), > .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY | > AV_CODEC_PROP_LOSSLESS, > }, > + { > + .id = AV_CODEC_ID_DTSUHD, > + .type = AVMEDIA_TYPE_AUDIO, > + .name = "dtsuhd", > + .long_name = NULL_IF_CONFIG_SMALL("DTSUHD (DTS-UHD Audio > Format)"), > + .props = AV_CODEC_PROP_LOSSY, > + }, > /* subtitle codecs */ > { > diff --git a/libavcodec/codec_id.h b/libavcodec/codec_id.h > index 64df9699f4..6d8b145ee3 100644 > --- a/libavcodec/codec_id.h > +++ b/libavcodec/codec_id.h > @@ -538,6 +538,7 @@ enum AVCodecID { > AV_CODEC_ID_FTR, > AV_CODEC_ID_WAVARC, > AV_CODEC_ID_RKA, > + AV_CODEC_ID_DTSUHD, > /* subtitle codecs */ > AV_CODEC_ID_FIRST_SUBTITLE = 0x17000, ///< A dummy ID > pointing at the start of subtitle codecs. > diff --git a/libavcodec/dtsuhd_common.c b/libavcodec/dtsuhd_common.c > new file mode 100644 > index 0000000000..110cb0c371 > --- /dev/null > +++ b/libavcodec/dtsuhd_common.c > @@ -0,0 +1,991 @@ > +/* > + * DTS-UHD common audio frame parsing code > + * Copyright (c) 2023 Xperi Corporation / DTS, Inc. > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > 02110-1301 USA > + */ > + > +/** > + * @file > + * Parse DTS-UHD audio frame headers, report frame sizes and > configuration. > + * Specification: ETSI TS 103 491 V1.2.1 > + */ > + > +#include <string.h> > + > +#include "dtsuhd_common.h" > +#include "get_bits.h" > +#include "libavutil/channel_layout.h" > + > +#define DTSUHD_ALLOC_INCREMENT 16 > +#define DTSUHD_CHUNK_HEADER 16 > + > +enum RepType { > + REP_TYPE_CH_MASK_BASED, > + REP_TYPE_MTRX2D_CH_MASK_BASED, > + REP_TYPE_MTRX3D_CH_MASK_BASED, > + REP_TYPE_BINAURAL, > + REP_TYPE_AMBISONIC, > + REP_TYPE_AUDIO_TRACKS, > + REP_TYPE_3D_OBJECT_SINGLE_SRC_PER_WF, > + REP_TYPE_3D_MONO_OBJECT_SINGLE_SRC_PER_WF, > +}; > + > +typedef struct MDObject { > + int started; /* Object seen since last reset. */ > + int pres_index; > + int rep_type; > + int ch_activity_mask; > +} MDObject; > + > +typedef struct MD01 { > + GetBitContext gb; > + MDObject object[257]; /* object id max value is 256 */ > + int chunk_id; > + int object_list[256]; int object_list_count; > + int packets_acquired; > + int static_md_extracted; > + int static_md_packets; > + int static_md_packet_size; > + int static_md_update_flag; > + uint8_t *buf; int buf_bytes; /* temporary buffer to accumulate static > data */ > +} MD01; > + > +typedef struct NAVI { > + int bytes; > + int id; > + int index; > + int present; > +} NAVI; > + > +typedef struct UHDAudio { > + int mask; > + int selectable; > +} UHDAudio; > + > +typedef struct UHDChunk { > + int crc_flag; > + int bytes; > +} UHDChunk; > + > +struct DTSUHD { > + const uint8_t *data; int data_bytes; /* Original audio frame buffer. > */ > + GetBitContext gb; > + MD01 *md01; int md01_count; > + NAVI *navi; int navi_alloc, navi_count; > + UHDAudio audio[256]; > + UHDChunk *chunk; int chunk_alloc, chunk_count; > + int chunk_bytes; > + int clock_rate; > + int frame_bytes; > + int frame_duration; > + int frame_duration_code; > + int ftoc_bytes; > + int major_version; > + int num_audio_pres; > + int sample_rate; > + int sample_rate_mod; > + unsigned full_channel_mix_flag:1; > + unsigned interactive_obj_limits_present:1; > + unsigned is_sync_frame:1; > + unsigned saw_sync:1; > +}; > + > +/* Read from the MD01 buffer (if present), falling back to the frame > buffer */ > +static inline int get_bits_md01(DTSUHD *h, MD01 *md01, int bits) > +{ > + if (md01->buf) > + return get_bits(&md01->gb, bits); > + return get_bits(&h->gb, bits); > +} > + > +/* In the specification, the pseudo code defaults the 'add' parameter to > true. > + Table 7-30 shows passing an explicit false, most other calls do not > + pass the extractAndAdd parameter. > + > + Function based on code in Table 5-2 > +*/ > +static int get_bits_var(GetBitContext *gb, const uint8_t table[], int add) > +{ > + static const int bits_used[8] = { 1, 1, 1, 1, 2, 2, 3, 3 }; > + static const int index_table[8] = { 0, 0, 0, 0, 1, 1, 2, 3 }; > + int code = show_bits(gb, 3); /* value range is [0, 7] */ > + int i; > + int index = index_table[code]; > + int value = 0; > + > + skip_bits(gb, bits_used[code]); > + if (table[index] > 0) { > + if (add) { > + for (i = 0; i < index; i++) > + value += 1 << table[i]; > + } > + value += get_bits_long(gb, table[index]); > + } > + > + return value; > +} > + > +/* Implied by Table 6-2, MD01 chunk objects appended in for loop */ > +static MD01 *chunk_append_md01(DTSUHD *h, int id) > +{ > + int md01_alloc = h->md01_count + 1; > + if (av_reallocp_array(&h->md01, md01_alloc, sizeof(*h->md01))) > + return NULL; > + > + memset(h->md01 + h->md01_count, 0, sizeof(*h->md01)); > + h->md01[h->md01_count].chunk_id = id; > + return h->md01 + h->md01_count++; > +} > + > +/* Return existing MD01 chunk based on chunkID */ > +static MD01 *chunk_find_md01(DTSUHD *h, int id) > +{ > + int i; > + > + for (i = 0; i < h->md01_count; i++) > + if (id == h->md01[i].chunk_id) > + return h->md01 + i; > + > + return NULL; > +} > + > +/* Table 6-3 */ > +static void chunk_reset(DTSUHD *h) > +{ > + int i; > + > + for (i = 0; i < h->md01_count; i++) > + av_freep(&h->md01[i].buf); > + av_freep(&h->md01); > + h->md01_count = 0; > +} > + > +static MDObject *find_default_audio(DTSUHD *h) > +{ > + MDObject *object; > + int i, j; > + int obj_index = -1; > + > + for (i = 0; i < h->md01_count; i++) { > + for (j = 0; j < 257; j++) { > + object = h->md01[i].object + j; > + if (object->started && > h->audio[object->pres_index].selectable) { > + if (obj_index < 0 || (object->pres_index < > h->md01[i].object[obj_index].pres_index)) > + obj_index = j; > + } > + } > + if (obj_index >= 0) > + return h->md01[i].object + obj_index; > + } > + > + return NULL; > +} > + > +/* Save channel mask, count, and rep type to descriptor info. > + ETSI TS 103 491 Table 7-28 channel activity mask bits > + mapping and SCTE DVS 243-4 Rev. 0.2 DG X Table 4. Convert activity > mask and > + representation type to channel mask and channel counts. > +*/ > +static void extract_object_info(MDObject *object, DTSUHDDescriptorInfo > *info) > +{ > + int i; > + static const struct { > + uint32_t activity_mask; > + uint32_t channel_mask; // Mask as defined by ETSI TS 103 491 > + uint64_t ffmpeg_channel_mask; // Mask as defined in ffmpeg > + } activity_map[] = { > + // act mask | chan mask | ffmpeg channel mask > + { 0x000001, 0x00000001, AV_CH_FRONT_CENTER }, > + { 0x000002, 0x00000006, AV_CH_FRONT_LEFT | AV_CH_FRONT_RIGHT }, > + { 0x000004, 0x00000018, AV_CH_SIDE_LEFT | AV_CH_SIDE_RIGHT }, > + { 0x000008, 0x00000020, AV_CH_LOW_FREQUENCY }, > + { 0x000010, 0x00000040, AV_CH_BACK_CENTER }, > + { 0x000020, 0x0000A000, AV_CH_TOP_FRONT_LEFT | > AV_CH_TOP_FRONT_RIGHT }, > + { 0x000040, 0x00000180, AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT }, > + { 0x000080, 0x00004000, AV_CH_TOP_FRONT_CENTER }, > + { 0x000100, 0x00080000, AV_CH_TOP_CENTER }, > + { 0x000200, 0x00001800, AV_CH_FRONT_LEFT_OF_CENTER | > AV_CH_FRONT_RIGHT_OF_CENTER }, > + { 0x000400, 0x00060000, AV_CHAN_WIDE_LEFT | AV_CHAN_WIDE_RIGHT }, > + { 0x000800, 0x00000600, AV_CH_SURROUND_DIRECT_LEFT | > AV_CH_SURROUND_DIRECT_RIGHT }, > + { 0x001000, 0x00010000, AV_CH_LOW_FREQUENCY_2 }, > + { 0x002000, 0x00300000, AV_CH_TOP_SIDE_LEFT | > AV_CH_TOP_SIDE_RIGHT }, > + { 0x004000, 0x00400000, AV_CH_TOP_BACK_CENTER }, > + { 0x008000, 0x01800000, AV_CH_TOP_BACK_LEFT | > AV_CH_TOP_BACK_RIGHT }, > + { 0x010000, 0x02000000, AV_CH_BOTTOM_FRONT_CENTER }, > + { 0x020000, 0x0C000000, AV_CH_BOTTOM_FRONT_LEFT | > AV_CH_BOTTOM_FRONT_RIGHT }, > + { 0x140000, 0x30000000, AV_CH_TOP_FRONT_LEFT | > AV_CH_TOP_FRONT_RIGHT }, > + { 0x080000, 0xC0000000, AV_CH_TOP_BACK_LEFT | > AV_CH_TOP_BACK_RIGHT }, > + { 0 } // Terminator > + }; > + > + if (object) { > + for (i = 0; activity_map[i].activity_mask; i++) { > + if (activity_map[i].activity_mask & object->ch_activity_mask) > { > + info->channel_mask |= activity_map[i].channel_mask; > + info->ffmpeg_channel_mask |= > activity_map[i].ffmpeg_channel_mask; > + } > + } > + info->channel_count = av_popcount(info->channel_mask); > + info->rep_type = object->rep_type; > + } > +} > + > +/* Assemble information for MP4 Sample Entry box. Sample Size is always > + 16 bits. The coding name is the name of the SampleEntry sub-box and is > + 'dtsx' unless the version of the bitstream is > 2. > + If DecoderProfile == 2, then MaxPayloadCode will be zero. > +*/ > +static void update_descriptor(DTSUHD *h, DTSUHDDescriptorInfo *info) > +{ > + static const char *coding_name[] = { "dtsx", "dtsy" }; > + > + memset(info, 0, sizeof(*info)); > + memcpy(info->coding_name, coding_name[h->major_version > 2], 5); > + extract_object_info(find_default_audio(h), info); > + info->base_sample_freq_code = h->sample_rate == 48000; > + info->decoder_profile_code = h->major_version - 2; > + info->frame_duration_code = h->frame_duration_code; > + info->max_payload_code = 0 + (h->major_version > 2); > + info->num_pres_code = h->num_audio_pres - 1; > + info->sample_rate = h->sample_rate; > + info->sample_rate_mod = h->sample_rate_mod; > + info->sample_size = 16; > + info->valid = 1; > +} > + > +/* Table 6-17 p47 */ > +static int parse_explicit_object_lists(DTSUHD *h, int mask, int index) > +{ > + GetBitContext *gb = &h->gb; > + int i; > + static const uint8_t table[4] = { 4, 8, 16, 32 }; > + > + for (i = 0; i < index; i++) { > + if ((mask >> i) & 0x01) { > + if (h->is_sync_frame || get_bits1(gb)) > + get_bits_var(gb, table, 1); > + } > + } > + > + return 0; > +} > + > +/* Table 6-15 p44, Table 6-16 p45 */ > +static int parse_aud_pres_params(DTSUHD *h) > +{ > + GetBitContext *gb = &h->gb; > + int audio; > + int i; > + int read_mask; > + static const uint8_t table[4] = { 0, 2, 4, 5 }; > + > + if (h->is_sync_frame) { > + if (h->full_channel_mix_flag) > + h->num_audio_pres = 1; > + else > + h->num_audio_pres = get_bits_var(gb, table, 1) + 1; > + memset(h->audio, 0, sizeof(h->audio[0]) * h->num_audio_pres); > + } > + > + for (audio = 0; audio < h->num_audio_pres; audio++) { > + if (h->is_sync_frame) { > + if (h->full_channel_mix_flag) > + h->audio[audio].selectable = 1; > + else > + h->audio[audio].selectable = get_bits1(gb); > + } > + > + if (h->audio[audio].selectable) { > + if (h->is_sync_frame) { > + read_mask = (audio > 0) ? get_bits(gb, audio) : 0; > + h->audio[audio].mask = 0; > + for (i = 0; read_mask; i++, read_mask >>= 1) { > + if (read_mask & 0x01) > + h->audio[audio].mask |= get_bits1(gb) << i; > + } > + } > + > + if (parse_explicit_object_lists(h, h->audio[audio].mask, > audio)) > + return 1; > + } else { > + h->audio[audio].mask = 0; > + } > + } > + > + return 0; > +} > + > +/* Table 6-9 p 38 */ > +static int check_crc(DTSUHD *h, int bit, int bytes) > +{ > + GetBitContext gb; > + int i; > + static const uint16_t lookup[16] = { > + 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7, > + 0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF > + }; > + uint16_t crc = 0xFFFF; > + > + init_get_bits(&gb, h->data, h->data_bytes * 8); > + skip_bits(&gb, bit); > + for (i = -bytes; i < bytes; i++) > + crc = (crc << 4) ^ lookup[(crc >> 12) ^ get_bits(&gb, 4)]; > + > + return crc != 0; > +} > + > +/* Table 6-12 p 40 */ > +static void decode_version(DTSUHD *h) > +{ > + GetBitContext *gb = &h->gb; > + int bits = get_bits1(gb) ? 3 : 6; > + > + h->major_version = get_bits(gb, bits) + 2; > + skip_bits(gb, bits); > +} > + > +/* Table 6-12 p 40 */ > +static int parse_stream_params(DTSUHD *h) > +{ > + GetBitContext *gb = &h->gb; > + int has_ftoc_crc; > + static const uint32_t table_base_duration[4] = { 512, 480, 384, 0 }; > + static const uint32_t table_clock_rate[4] = { 32000, 44100, 48000, 0 > }; > + > + if (h->is_sync_frame) > + h->full_channel_mix_flag = get_bits1(gb); > + > + has_ftoc_crc = !h->full_channel_mix_flag || h->is_sync_frame; > + if (has_ftoc_crc && check_crc(h, 0, h->ftoc_bytes)) > + return 1; > + > + if (h->is_sync_frame) { > + if (h->full_channel_mix_flag) > + h->major_version = 2; > + else > + decode_version(h); > + > + h->frame_duration = table_base_duration[get_bits(gb, 2)]; > + h->frame_duration_code = get_bits(gb, 3); > + h->frame_duration *= (h->frame_duration_code + 1); > + h->clock_rate = table_clock_rate[get_bits(gb, 2)]; > + if (h->frame_duration == 0 || h->clock_rate == 0) > + return 1; /* bitstream error */ > + > + skip_bits(gb, 36 * get_bits1(gb)); /* bTimeStampPresent */ > + h->sample_rate_mod = get_bits(gb, 2); > + h->sample_rate = h->clock_rate * (1 << h->sample_rate_mod); > + > + if (h->full_channel_mix_flag) { > + h->interactive_obj_limits_present = 0; > + } else { > + skip_bits1(gb); /* reserved flag. */ > + h->interactive_obj_limits_present = get_bits1(gb); > + } > + } > + > + return 0; > +} > + > +/* Table 6-24 p52 */ > +static void navi_purge(DTSUHD *h) > +{ > + int i; > + > + for (i = 0; i < h->navi_count; i++) > + if (!h->navi[i].present) > + h->navi[i].bytes = 0; > +} > + > +/* Table 6-21 p50 */ > +static void navi_clear(DTSUHD *h) > +{ > + if (h->navi) > + memset(h->navi, 0, sizeof(h->navi[0]) * h->navi_count); > + h->navi_count = 0; > +} > + > +/* Table 6-22 p51 */ > +static void navi_clear_present(DTSUHD *h) > +{ > + int i; > + > + for (i = 0; i < h->navi_count; i++) > + h->navi[i].present = 0; > +} > + > +/* Table 6-23 p51. Return 0 on success, and the index is returned in > + the *listIndex parameter. > +*/ > +static int navi_find_index(DTSUHD *h, int desired_index, int *list_index) > +{ > + int avail_index = h->navi_count; > + int i; > + int navi_alloc; > + > + for (i = 0; i < h->navi_count; i++) { > + if (h->navi[i].index == desired_index) { > + *list_index = i; > + h->navi[i].present = 1; > + return 0; > + } > + > + if ((h->navi[i].present == 0) && (h->navi[i].bytes == 0) && > (avail_index > i)) > + avail_index = i; > + } > + > + if (avail_index >= h->navi_count) { > + if (h->navi_count >= h->navi_alloc) { > + navi_alloc = h->navi_count + DTSUHD_ALLOC_INCREMENT; > + if (av_reallocp_array(&h->navi, navi_alloc, sizeof(*h->navi))) > + return 1; > + h->navi_alloc = navi_alloc; > + } > + h->navi_count++; > + } > + > + *list_index = avail_index; > + h->navi[avail_index].bytes = 0; > + h->navi[avail_index].present = 1; > + h->navi[avail_index].id = 256; > + h->navi[avail_index].index = desired_index; > + > + return 0; > +} > + > +/* Table 6-20 p48 */ > +static int parse_chunk_navi(DTSUHD *h) > +{ > + GetBitContext *gb = &h->gb; > + int audio_chunks = 1; > + int bytes; > + int i; > + int id; > + int id_present; > + int index; > + int list_index; > + static const uint8_t table2468[4] = { 2, 4, 6, 8 }; > + static const uint8_t table_audio_chunk_sizes[4] = { 9, 11, 13, 16 }; > + static const uint8_t table_chunk_sizes[4] = { 6, 9, 12, 15 }; > + > + h->chunk_bytes = 0; > + if (h->full_channel_mix_flag) > + h->chunk_count = h->is_sync_frame; > + else > + h->chunk_count = get_bits_var(gb, table2468, 1); > + > + if (h->chunk_count >= h->chunk_alloc) { > + int chunk_alloc = h->chunk_count + DTSUHD_ALLOC_INCREMENT; > + if (av_reallocp_array(&h->chunk, chunk_alloc, sizeof(*h->chunk))) > + return 1; > + h->chunk_alloc = chunk_alloc; > + } > + > + for (i = 0; i < h->chunk_count; i++) { > + h->chunk_bytes += h->chunk[i].bytes = get_bits_var(gb, > table_chunk_sizes, 1); > + if (h->full_channel_mix_flag) > + h->chunk[i].crc_flag = 0; > + else > + h->chunk[i].crc_flag = get_bits1(gb); > + } > + > + if (!h->full_channel_mix_flag) > + audio_chunks = get_bits_var(gb, table2468, 1); > + > + if (h->is_sync_frame) > + navi_clear(h); > + else > + navi_clear_present(h); > + > + for (i = 0; i < audio_chunks; i++) { > + if (h->full_channel_mix_flag) > + index = 0; > + else > + index = get_bits_var(gb, table2468, 1); > + > + if (navi_find_index(h, index, &list_index)) > + return 1; > + > + if (h->is_sync_frame) > + id_present = 1; > + else if (h->full_channel_mix_flag) > + id_present = 0; > + else > + id_present = get_bits1(gb); > + > + if (id_present) { > + id = get_bits_var(gb, table2468, 1); > + h->navi[list_index].id = id; > + } > + > + bytes = get_bits_var(gb, table_audio_chunk_sizes, 1); > + h->chunk_bytes += bytes; > + h->navi[list_index].bytes = bytes; > + } > + > + navi_purge(h); > + > + return 0; > +} > + > + > +/* Table 6-6 */ > +static int parse_md_chunk_list(DTSUHD *h, MD01 *md01) > +{ > + GetBitContext *gb = &h->gb; > + const uint8_t table1[4] = { 3, 4, 6, 8 }; > + int i; > + > + if (h->full_channel_mix_flag) { > + md01->object_list_count = 1; > + md01->object_list[0] = 256; > + } else { > + md01->object_list_count = get_bits_var(gb, table1, 1); > + for (i = 0; i < md01->object_list_count; i++) > + md01->object_list[i] = get_bits(gb, get_bits1(gb) ? 8 : 4); > + } > + > + return 0; > +} > + > +/* Table 7-9 */ > +static void skip_mp_param_set(DTSUHD *h, MD01 *md01, int nominal_flag) > +{ > + get_bits_md01(h, md01, 6); /* rLoudness */ > + if (nominal_flag == 0) > + get_bits_md01(h, md01, 5); > + > + get_bits_md01(h, md01, nominal_flag ? 2 : 4); > +} > + > +/* Table 7-8 */ > +static int parse_static_md_params(DTSUHD *h, MD01 *md01, int only_first) > +{ > + int i; > + int loudness_sets = 1; > + int nominal_flag = 1; > + > + if (h->full_channel_mix_flag == 0) > + nominal_flag = get_bits_md01(h, md01, 1); > + > + if (nominal_flag) { > + if (h->full_channel_mix_flag == 0) > + loudness_sets = get_bits_md01(h, md01, 1) ? 3 : 1; > + } else { > + loudness_sets = get_bits_md01(h, md01, 4) + 1; > + } > + > + for (i = 0; i < loudness_sets; i++) > + skip_mp_param_set(h, md01, nominal_flag); > + > + if (only_first) > + return 0; > + > + if (nominal_flag == 0) > + get_bits_md01(h, md01, 1); > + > + for (i = 0; i < 3; i++) { /* Table 7-12 suggest 3 types */ > + if (get_bits_md01(h, md01, 1)) { > + if (get_bits_md01(h, md01, 4) == 15) /* Table 7-14 */ > + get_bits_md01(h, md01, 15); > + } > + if (get_bits_md01(h, md01, 1)) /* smooth md present */ > + get_bits_md01(h, md01, 6 * 6); > + } > + > + if (h->full_channel_mix_flag == 0) { > + i = md01->static_md_packets * md01->static_md_packet_size - > get_bits_count(&md01->gb); > + skip_bits(&md01->gb, i); > + } > + md01->static_md_extracted = 1; > + > + return 0; > +} > + > +/* Table 7-7 */ > +static int parse_multi_frame_md(DTSUHD *h, MD01 *md01) > +{ > + GetBitContext *gb = &h->gb; > + int i, n; > + static const uint8_t table1[4] = { 0, 6, 9, 12 }; > + static const uint8_t table2[4] = { 5, 7, 9, 11 }; > + > + if (h->is_sync_frame) { > + md01->packets_acquired = 0; > + if (h->full_channel_mix_flag) { > + md01->static_md_packets = 1; > + md01->static_md_packet_size = 0; > + } else { > + md01->static_md_packets = get_bits_var(gb, table1, 1) + 1; > + md01->static_md_packet_size = get_bits_var(gb, table2, 1) + 3; > + } > + > + n = md01->static_md_packets * md01->static_md_packet_size; > + if (n > md01->buf_bytes) { > + if (av_reallocp(&md01->buf, n)) > + return 1; > + md01->buf_bytes = n; > + } > + > + init_get_bits(&md01->gb, md01->buf, md01->buf_bytes * 8); > + if (md01->static_md_packets > 1) > + md01->static_md_update_flag = get_bits1(gb); > + else > + md01->static_md_update_flag = 1; > + } > + > + if (md01->packets_acquired < md01->static_md_packets) { > + n = md01->packets_acquired * md01->static_md_packet_size; > + for (i = 0; i < md01->static_md_packet_size; i++) > + md01->buf[n + i] = get_bits(gb, 8); > + md01->packets_acquired++; > + > + if (md01->packets_acquired == md01->static_md_packets) { > + if (md01->static_md_update_flag || !md01->static_md_extracted) > + if (parse_static_md_params(h, md01, 0)) > + return 1; > + } else if (md01->packets_acquired == 1) { > + if (md01->static_md_update_flag || !md01->static_md_extracted) > + if (parse_static_md_params(h, md01, 1)) > + return 1; > + } > + } > + > + return 0; > +} > + > +/* Return 1 if suitable, 0 if not. Table 7-18. OBJGROUPIDSTART=224 Sec > 7.8.7 p75 */ > +static int is_suitable_for_render(DTSUHD *h, MD01 *md01, int object_id) > +{ > + GetBitContext *gb = &h->gb; > + static const uint8_t table[4] = { 8, 10, 12, 14 }; > + > + if (object_id >= 224 || get_bits1(gb)) > + return 1; > + > + /* Reject the render and skip the render data. */ > + skip_bits1(gb); > + skip_bits(gb, get_bits_var(gb, table, 1)); > + > + return 0; > +} > + > +/* Table 7-26 */ > +static void parse_ch_mask_params(DTSUHD *h, MD01 *md01, MDObject *object) > +{ > + GetBitContext *gb = &h->gb; > + const int ch_index = object->rep_type == REP_TYPE_BINAURAL ? 1 : > get_bits(gb, 4); > + static const int mask_table[14] = { /* Table 7-27 */ > + 0x000001, 0x000002, 0x000006, 0x00000F, 0x00001F, 0x00084B, > 0x00002F, > + 0x00802F, 0x00486B, 0x00886B, 0x03FBFB, 0x000003, 0x000007, > 0x000843, > + }; > + > + if (ch_index == 14) > + object->ch_activity_mask = get_bits(gb, 16); > + else if (ch_index == 15) > + object->ch_activity_mask = get_bits(gb, 32); > + else > + object->ch_activity_mask = mask_table[ch_index]; > +} > + > +/* Table 7-22 */ > +static int parse_object_metadata(DTSUHD *h, MD01 *md01, MDObject *object, > + int start_frame_flag, int object_id) > +{ > + GetBitContext *gb = &h->gb; > + int ch_mask_object_flag = 0; > + int object_3d_metadata_flag = 0; > + static const uint8_t table2[4] = { 1, 4, 4, 8 }; > + static const uint8_t table3[4] = { 3, 3, 4, 8 }; > + > + skip_bits(gb, object_id != 256); > + > + if (start_frame_flag) { > + object->rep_type = get_bits(gb, 3); > + switch (object->rep_type) { > + case REP_TYPE_BINAURAL: > + case REP_TYPE_CH_MASK_BASED: > + case REP_TYPE_MTRX2D_CH_MASK_BASED: > + case REP_TYPE_MTRX3D_CH_MASK_BASED: > + ch_mask_object_flag = 1; > + break; > + > + case REP_TYPE_3D_OBJECT_SINGLE_SRC_PER_WF: > + case REP_TYPE_3D_MONO_OBJECT_SINGLE_SRC_PER_WF: > + object_3d_metadata_flag = 1; > + break; > + } > + > + if (ch_mask_object_flag) { > + if (object_id != 256) { > + skip_bits(gb, 3); /* Object Importance Level */ > + if (get_bits1(gb)) > + skip_bits(gb, get_bits1(gb) ? 3 : 5); > + > + get_bits_var(gb, table2, 1); > + get_bits_var(gb, table3, 1); > + > + /* Skip optional Loudness block. */ > + if (get_bits1(gb)) > + skip_bits(gb, 8); > + > + /* Skip optional Object Interactive MD (Table 7-25). */ > + if (get_bits1(gb) && h->interactive_obj_limits_present) { > + if (get_bits1(gb)) > + skip_bits(gb, 5 + 6 * object_3d_metadata_flag); > + } > + } > + > + parse_ch_mask_params(h, md01, object); > + } > + } > + > + /* Skip rest of object */ > + return 0; > +} > + > +/* Table 7-4 */ > +static int parse_md01(DTSUHD *h, MD01 *md01, int pres_index) > +{ > + GetBitContext *gb = &h->gb; > + uint32_t i; > + uint32_t id; > + uint32_t start_flag; > + > + if (h->audio[pres_index].selectable) { > + for (i = 0; i < 4; i++) /* Table 7-5. Scaling data. */ > + skip_bits(gb, 5 * get_bits1(gb)); > + > + if (get_bits1(gb) && parse_multi_frame_md(h, md01)) > + return 1; > + } > + > + /* Table 7-16: Object metadata. */ > + memset(md01->object, 0, sizeof(md01->object)); > + if (!h->full_channel_mix_flag) > + skip_bits(gb, 11 * get_bits1(gb)); > + > + for (i = 0; i < md01->object_list_count; i++) { > + id = md01->object_list[i]; > + if (!is_suitable_for_render(h, md01, id)) > + continue; > + > + md01->object[id].pres_index = pres_index; > + start_flag = 0; > + if (!md01->object[id].started) { > + skip_bits(gb, id != 256); > + start_flag = md01->object[id].started = 1; > + } > + > + if ((id < 224 || id > 255) && > + parse_object_metadata(h, md01, md01->object + id, start_flag, > id)) { > + return 1; > + } > + > + break; > + } > + > + return 0; > +} > + > +/* Table 6-2 */ > +static int parse_chunks(DTSUHD *h) > +{ > + GetBitContext *gb = &h->gb; > + MD01 *md01; > + int bit_next; > + int i; > + static const uint8_t table_aud_pres[4] = { 0, 2, 4, 4 }; > + int pres_index; > + uint32_t id; > + > + for (i = 0; i < h->chunk_count; i++) { > + bit_next = get_bits_count(gb) + h->chunk[i].bytes * 8; > + if (h->chunk[i].crc_flag && check_crc(h, get_bits_count(gb), > h->chunk[i].bytes)) > + return 1; > + > + id = get_bits(gb, 8); > + if (id == 1) { > + pres_index = get_bits_var(gb, table_aud_pres, 1); > + if (pres_index > 255) > + return 1; > + md01 = chunk_find_md01(h, id); > + if (md01 == NULL) > + md01 = chunk_append_md01(h, id); > + if (md01 == NULL) > + return 1; > + if (parse_md_chunk_list(h, md01)) > + return 1; > + if (parse_md01(h, md01, pres_index)) > + return 1; > + } > + > + skip_bits(gb, bit_next - get_bits_count(gb)); > + } > + > + return 0; > +} > + > +/** Allocate parsing handle. The parsing handle should be used to parse > + one DTS:X Profile 2 Audio stream, then freed by calling > DTSUHD_destroy(). > + Do not use the same parsing handle to parse multiple audio streams. > + > + @return Parsing handle for use with other functions, or NULL on failure. > +*/ > +DTSUHD *dtsuhd_create(void) > +{ > + return av_calloc(1, sizeof(DTSUHD)); > +} > + > +/** Free all resources used by the parsing handle. > + > + @param[in] h Handle allocated by dtshd_create > +*/ > +void dtsuhd_destroy(DTSUHD *h) > +{ > + if (h) { > + chunk_reset(h); > + av_freep(&h->chunk); > + av_freep(&h->navi); > + av_freep(&h); > + } > +} > + > +/** Parse a single DTS:X Profile 2 frame. > + The frame must start at the first byte of the data buffer, and enough > + of the frame must be present to decode the majority of the FTOC. > + From Table 6-11 p40. > + > + A sync frame must be the first frame provided, before any non-sync > frames. > + Signatures: sync=0x40411BF2, non-sync=0x71C442E8. > + > + @param[in] h Handle allocated by DTSUHD_create > + @param[in] First byte of a buffer containing the frame to parse > + @param[in] nData Number of valid bytes in 'data' > + @param[out] fi Results of frame parsing, may be NULL > + @param[out] di Results of descriptor parsing, may be NULL > + @return 0 on success, DTSUHDStatus enumeration on error > +*/ > +int dtsuhd_frame(DTSUHD *h, const uint8_t *data, size_t data_bytes, > + DTSUHDFrameInfo *fi, DTSUHDDescriptorInfo *di) > +{ > + GetBitContext *gb; > + int fraction = 1; > + int i; > + int syncword; > + static const uint8_t table_payload[4] = { 5, 8, 10, 12 }; > + > + if (!h || !data) > + return DTSUHD_NULL; > + > + if (data_bytes < 4) > + return DTSUHD_INCOMPLETE; /* Data buffer does not contain the > signature */ > + > + h->data = data; > + h->data_bytes = data_bytes; > + gb = &h->gb; > + init_get_bits(gb, data, data_bytes * 8); > init_get_bits8, and check return code. > + > + syncword = get_bits_long(gb, 32); > + h->is_sync_frame = syncword == DTSUHD_SYNCWORD; > + h->saw_sync |= h->is_sync_frame; > + if (!h->saw_sync || (!h->is_sync_frame && syncword != > DTSUHD_NONSYNCWORD)) > + return DTSUHD_NOSYNC; /* Invalid frame or have not parsed sync > frame. */ > + > + h->ftoc_bytes = get_bits_var(gb, table_payload, 1) + 1; > + if (h->ftoc_bytes < 5 || h->ftoc_bytes >= data_bytes) > + return DTSUHD_INCOMPLETE; /* Data buffer does not contain entire > FTOC */ > + > + if (parse_stream_params(h)) > + return DTSUHD_INVALID_FRAME; > + > + if (parse_aud_pres_params(h)) > + return DTSUHD_INVALID_FRAME; > + > + if (parse_chunk_navi(h)) /* AudioChunkTypes and payload sizes. */ > + return DTSUHD_INVALID_FRAME; > + > + /* At this point in the parsing, we can calculate the size of the > frame. */ > + h->frame_bytes = h->ftoc_bytes + h->chunk_bytes; > + if (h->frame_bytes > data_bytes) > + return DTSUHD_INCOMPLETE; > + > + if (di && h->is_sync_frame) { > + /* Skip PBRSmoothParams (Table 6-26) and align to the chunks > immediately > + following the FTOC CRC. > + */ > + skip_bits(gb, h->ftoc_bytes * 8 - get_bits_count(gb)); > + if (parse_chunks(h)) > + return DTSUHD_INVALID_FRAME; > + update_descriptor(h, di); > + } > + > + /* 6.3.6.9: audio frame duration may be a fraction of metadata frame > duration. */ > + for (i = 0; i < h->navi_count; i++) { > + if (h->navi[i].present) { > + if (h->navi[i].id == 3) > + fraction = 2; > + else if (h->navi[i].id == 4) > + fraction = 4; > + } > + } > + > + if (fi) { > + fi->sync = h->is_sync_frame; > + fi->frame_bytes = h->frame_bytes; > + fi->sample_rate = h->sample_rate; > + fi->sample_count = (h->frame_duration * fi->sample_rate) / > (h->clock_rate * fraction); > + fi->duration = (double)fi->sample_count / fi->sample_rate; > Please no double type. Also make use of av_rescale. > + } > + > + return DTSUHD_OK; > +} > + > +/** Return the offset of the first UHD audio frame. > + When supplied a buffer containing DTSHDHDR file content, the DTSHD > + headers are skipped and the offset to the first byte of the STRMDATA > + chunk is returned, along with the size of that chunk. > + > + @param[in] dataStart DTS:X Profile 2 file content to parse > + @param[in] dataSize Number of valid bytes in 'dataStart' > + @param[out] Number of leading DTS:X Profile 2 audio frames to discard, > + may be NULL > + @param[out] Size of STRMDATA payload, may be NULL > + @return STRMDATA payload offset or 0 if not a valid DTS:X Profile 2 file > +*/ > +int dtsuhd_strmdata_payload(const uint8_t *data_start, int data_size, > size_t *strmdata_size) > +{ > + const uint8_t *data = data_start; > + const uint8_t *data_end = data + data_size; > + uint64_t chunk_size = 0; > + > + if (data + DTSUHD_CHUNK_HEADER >= data_end || memcmp(data, > "DTSHDHDR", 8)) > + return 0; > + > + for (; data + DTSUHD_CHUNK_HEADER + 4 <= data_end; data += chunk_size > + DTSUHD_CHUNK_HEADER) { > + chunk_size = AV_RB64(data + 8); > + > + if (!memcmp(data, "STRMDATA", 8)) { > + if (strmdata_size) > + *strmdata_size = chunk_size; > + return (int)(data - data_start) + DTSUHD_CHUNK_HEADER; > + } > + } > + > + return 0; > +} > diff --git a/libavcodec/dtsuhd_common.h b/libavcodec/dtsuhd_common.h > new file mode 100644 > index 0000000000..8b4e8ce2aa > --- /dev/null > +++ b/libavcodec/dtsuhd_common.h > @@ -0,0 +1,84 @@ > +/* > + * DTS-UHD common audio frame parsing code > + * Copyright (c) 2023 Xperi Corporation / DTS, Inc. > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > 02110-1301 USA > + */ > + > +#ifndef AVCODEC_DTSUHD_COMMON_H > +#define AVCODEC_DTSUHD_COMMON_H > + > +#include <stdint.h> > +#include <stdlib.h> > + > +#define DTSUHD_NONSYNCWORD 0x71C442E8 > +#define DTSUHD_SYNCWORD 0x40411BF2 > + > +#define DTSUHD_MAX_FRAME_SIZE 0x1000 > + > +/* Return codes from dtsuhd_frame */ > +enum DTSUHDStatus { > + DTSUHD_OK, > + DTSUHD_INCOMPLETE, /* Entire frame not in buffer. */ > + DTSUHD_INVALID_FRAME, /* Error parsing frame. */ > + DTSUHD_NOSYNC, /* No sync frame prior to non-sync frame. */ > + DTSUHD_NULL, /* Function parameter may not be NULL. */ > +}; > + > +/* Return stream information from an audio frame parsed by dtsuhd_frame, > */ > +typedef struct DTSUHDDescriptorInfo { > + unsigned valid:1; /* True if descriptor info is valid. */ > + char coding_name[5]; /* Four character, null term SampleEntry box > name. */ > + int base_sample_freq_code; > + int channel_count; > + int decoder_profile_code; > + int frame_duration_code; > + int max_payload_code; > + int num_pres_code; > + int rep_type; > + int sample_rate; > + int sample_rate_mod; > + int sample_size; > + int channel_mask; > + uint64_t ffmpeg_channel_mask; > +} DTSUHDDescriptorInfo; > + > +/* Return frame information from an audio frame parsed by dtsuhd_frame. */ > +typedef struct DTSUHDFrameInfo { > + double duration; /* Duration of frame in seconds (seconds per > frame). */ > + int frame_bytes; /* Size of entire frame in bytes. */ > + int sample_count; /* Number of samples in frame (samples per frame). > */ > + int sample_rate; /* Sample rate of frame (samples per second). */ > + unsigned sync:1; /* True if frame is a sync frame. */ > +} DTSUHDFrameInfo; > + > +struct DTSUHD; > +typedef struct DTSUHD DTSUHD; > + > +struct DTSUHD *dtsuhd_create(void); > +void dtsuhd_destroy(DTSUHD*); > +int dtsuhd_frame(DTSUHD*, const uint8_t *data, size_t nData, > + DTSUHDFrameInfo*, DTSUHDDescriptorInfo*); > +int dtsuhd_strmdata_payload(const uint8_t *data_start, int data_size, > + size_t *strmdata_size); > + > +static inline int dtsuhd_is_syncword(uint32_t syncword) > +{ > + return syncword == DTSUHD_NONSYNCWORD || syncword == DTSUHD_SYNCWORD; > +} > + > +#endif /* AVCODEC_DTSUHD_COMMON_H */ > diff --git a/libavcodec/dtsuhd_parser.c b/libavcodec/dtsuhd_parser.c > new file mode 100644 > index 0000000000..4c553b8e4f > --- /dev/null > +++ b/libavcodec/dtsuhd_parser.c > @@ -0,0 +1,141 @@ > +/* > + * DTS-UHD audio frame parsing code > + * Copyright (c) 2023 Xperi Corporation / DTS, Inc. > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > 02110-1301 USA > + */ > + > +/** > + * @file > + * Parse raw DTS-UHD audio frame input and return individual audio frames. > + */ > + > +#include "dtsuhd_common.h" > +#include "libavutil/intreadwrite.h" > +#include "parser.h" > + > +#define DTSUHD_BUFFER_SIZE (DTSUHD_MAX_FRAME_SIZE * 128) > + > +typedef struct DTSUHDParseContext { > + DTSUHD *dtsuhd; > + int buf_offset; > + int buf_bytes; > + int frame_bytes; > + uint8_t *buf; > +} DTSUHDParseContext; > + > +static av_cold int parser_init(AVCodecParserContext *s) > +{ > + DTSUHDParseContext *pc = s->priv_data; > + > + pc->dtsuhd = dtsuhd_create(); > + pc->buf = av_calloc(DTSUHD_BUFFER_SIZE + > AV_INPUT_BUFFER_PADDING_SIZE, 1); > + if (!pc->dtsuhd || !pc->buf) > + return AVERROR(ENOMEM); > + > + return 0; > +} > + > +static void parser_close(AVCodecParserContext *s) > +{ > + DTSUHDParseContext *pc = s->priv_data; > + > + dtsuhd_destroy(pc->dtsuhd); > + pc->dtsuhd = NULL; > + av_freep(&pc->buf); > + ff_parse_close(s); > +} > + > +// Keep data in contiguous buffer as required by dtsuhd_frame. > +static int append_buffer(DTSUHDParseContext *pc, const uint8_t **buf, int > *buf_size, int *input_consumed) > +{ > + int copy_bytes; > + > + pc->buf_offset += pc->frame_bytes; > + pc->frame_bytes = 0; > + > + // Buffer almost full, move partial frame to start of buffer for more > space. > + if (*buf_size > 0 && pc->buf_bytes + *buf_size > DTSUHD_BUFFER_SIZE) { > + memmove(pc->buf, pc->buf + pc->buf_offset, pc->buf_bytes); > + pc->buf_bytes -= pc->buf_offset; > + pc->buf_offset = 0; > + } > + > + copy_bytes = FFMAX(0, FFMIN(DTSUHD_BUFFER_SIZE - pc->buf_bytes, > *buf_size)); > + > + // Append input buffer to our context. > + if (copy_bytes) { > + memcpy(pc->buf + pc->buf_bytes, *buf, copy_bytes); > + pc->buf_bytes += copy_bytes; > + } > + > + // Ensure buffer starts with a syncword > + while (pc->buf_offset + 4 < pc->buf_bytes && > !dtsuhd_is_syncword(AV_RB32(pc->buf + pc->buf_offset))) > + pc->buf_offset++; > + > + *input_consumed = copy_bytes; > + *buf = pc->buf + pc->buf_offset; > + *buf_size = pc->buf_bytes - pc->buf_offset; > + > + return copy_bytes && pc->buf_bytes - pc->buf_offset < > DTSUHD_MAX_FRAME_SIZE; > +} > + > +static int parser_parse(AVCodecParserContext *s, AVCodecContext *avctx, > + const uint8_t **poutbuf, int *poutbuf_size, > + const uint8_t *buf, int buf_size) > +{ > + DTSUHDParseContext *pc = s->priv_data; > + DTSUHDFrameInfo fi; > + int input_consumed = 0; > + > + if (append_buffer(pc, &buf, &buf_size, &input_consumed)) { > + *poutbuf = NULL; > + *poutbuf_size = 0; > + return input_consumed; > + } > + > + switch (dtsuhd_frame(pc->dtsuhd, buf, buf_size, &fi, NULL)) { > + case DTSUHD_OK: > + if (fi.sample_count) > + s->duration = fi.sample_count; > + if (fi.sample_rate) > + avctx->sample_rate = fi.sample_rate; > + buf_size = pc->frame_bytes = fi.frame_bytes; > + break; > + case DTSUHD_INCOMPLETE: > + pc->frame_bytes = buf_size; > + buf = NULL; > + buf_size = 0; > + break; > + default: > + av_log(avctx, AV_LOG_ERROR, "Unable to process DTS-UHD file. File > may be invalid.\n"); > + return AVERROR_INVALIDDATA; > + } > + > + *poutbuf = buf; > + *poutbuf_size = buf_size; > + > + return input_consumed; > +} > + > +AVCodecParser ff_dtsuhd_parser = { > + .codec_ids = { AV_CODEC_ID_DTSUHD }, > + .priv_data_size = sizeof(DTSUHDParseContext), > + .parser_init = parser_init, > + .parser_parse = parser_parse, > + .parser_close = parser_close, > +}; > diff --git a/libavcodec/parsers.c b/libavcodec/parsers.c > index d355808018..d724c8b402 100644 > --- a/libavcodec/parsers.c > +++ b/libavcodec/parsers.c > @@ -37,6 +37,7 @@ extern const AVCodecParser ff_dirac_parser; > extern const AVCodecParser ff_dnxhd_parser; > extern const AVCodecParser ff_dolby_e_parser; > extern const AVCodecParser ff_dpx_parser; > +extern const AVCodecParser ff_dtsuhd_parser; > extern const AVCodecParser ff_dvaudio_parser; > extern const AVCodecParser ff_dvbsub_parser; > extern const AVCodecParser ff_dvdsub_parser; > diff --git a/libavformat/Makefile b/libavformat/Makefile > index 048649689b..42cf19348f 100644 > --- a/libavformat/Makefile > +++ b/libavformat/Makefile > @@ -186,6 +186,7 @@ OBJS-$(CONFIG_DSICIN_DEMUXER) += dsicin.o > OBJS-$(CONFIG_DSS_DEMUXER) += dss.o > OBJS-$(CONFIG_DTSHD_DEMUXER) += dtshddec.o > OBJS-$(CONFIG_DTS_DEMUXER) += dtsdec.o rawdec.o > +OBJS-$(CONFIG_DTSUHD_DEMUXER) += dtsuhddec.o > OBJS-$(CONFIG_DTS_MUXER) += rawenc.o > OBJS-$(CONFIG_DV_MUXER) += dvenc.o > OBJS-$(CONFIG_DVBSUB_DEMUXER) += dvbsub.o rawdec.o > diff --git a/libavformat/allformats.c b/libavformat/allformats.c > index cb5b69e9cd..1b48ce6073 100644 > --- a/libavformat/allformats.c > +++ b/libavformat/allformats.c > @@ -144,6 +144,7 @@ extern const AVInputFormat ff_dss_demuxer; > extern const AVInputFormat ff_dts_demuxer; > extern const FFOutputFormat ff_dts_muxer; > extern const AVInputFormat ff_dtshd_demuxer; > +extern const AVInputFormat ff_dtsuhd_demuxer; > extern const AVInputFormat ff_dv_demuxer; > extern const FFOutputFormat ff_dv_muxer; > extern const AVInputFormat ff_dvbsub_demuxer; > diff --git a/libavformat/dtshddec.c b/libavformat/dtshddec.c > index a3dea0668f..6e9e78a335 100644 > --- a/libavformat/dtshddec.c > +++ b/libavformat/dtshddec.c > @@ -46,7 +46,7 @@ typedef struct DTSHDDemuxContext { > static int dtshd_probe(const AVProbeData *p) > { > if (AV_RB64(p->buf) == DTSHDHDR) > - return AVPROBE_SCORE_MAX; > + return AVPROBE_SCORE_MAX - 4; // DTSUHD (.dtsx) files also have > this signature. > return 0; > } > diff --git a/libavformat/dtsuhddec.c b/libavformat/dtsuhddec.c > new file mode 100644 > index 0000000000..e15176382d > --- /dev/null > +++ b/libavformat/dtsuhddec.c > @@ -0,0 +1,214 @@ > +/* > + * DTS-UHD audio demuxer > + * Copyright (c) 2023 Xperi Corporation / DTS, Inc. > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > 02110-1301 USA > + */ > + > +/** > + * @file > + * Report DTS-UHD audio stream configuration and extract raw packet data. > + */ > + > +#include "internal.h" > +#include "libavcodec/dtsuhd_common.h" > +#include "libavcodec/put_bits.h" > +#include "libavutil/intreadwrite.h" > + > +#define DTSUHD_BUFFER_SIZE (1024 * 1024) > + > +typedef struct DTSUHDDemuxContext { > + size_t data_end; > + struct DTSUHD *dtsuhd; > + uint8_t *buf; > +} DTSUHDDemuxContext; > + > +static int probe(const AVProbeData *p) > +{ > + int offset = dtsuhd_strmdata_payload(p->buf, p->buf_size, NULL); > + int score = 0; > + struct DTSUHD *h = dtsuhd_create(); > + > + for (; offset + 4 < p->buf_size; offset++) { > + if (dtsuhd_is_syncword(AV_RB32(p->buf + offset))) { > + if (DTSUHD_OK == dtsuhd_frame(h, p->buf + offset, p->buf_size > - offset, NULL, NULL)) { > + score = AVPROBE_SCORE_MAX - 3; > + break; > + } > + } > + } > + > + dtsuhd_destroy(h); > + return score; > +} > + > +static av_cold int read_close(AVFormatContext *s) > +{ > + DTSUHDDemuxContext *dtsxs = s->priv_data; > + > + av_freep(&dtsxs->buf); > + dtsuhd_destroy(dtsxs->dtsuhd); > + dtsxs->dtsuhd = NULL; > + > + return 0; > +} > + > +static int find_first_syncword(DTSUHDDemuxContext *dtsuhd, int data_start) > +{ > + while (data_start + 4 < DTSUHD_BUFFER_SIZE && > + !dtsuhd_is_syncword(AV_RB32(dtsuhd->buf + data_start))) { > + data_start++; > + } > + > + return data_start; > +} > + > +static int write_extradata(AVCodecParameters *par, DTSUHDDescriptorInfo > *di) > +{ > + PutBitContext pbc; > + int ret; > + int size; > + uint8_t udts[32]; > + > + init_put_bits(&pbc, udts, sizeof(udts)); > + put_bits32(&pbc, 0); // udts box size > + put_bits(&pbc, 8, 'u'); // udts box signature > + put_bits(&pbc, 8, 'd'); > + put_bits(&pbc, 8, 't'); > + put_bits(&pbc, 8, 's'); > + put_bits(&pbc, 6, di->decoder_profile_code); > + put_bits(&pbc, 2, di->frame_duration_code); > + put_bits(&pbc, 3, di->max_payload_code); > + put_bits(&pbc, 5, di->num_pres_code); > + put_bits32(&pbc, di->channel_mask); > + put_bits(&pbc, 1, di->base_sample_freq_code); > + put_bits(&pbc, 2, di->sample_rate_mod); > + put_bits(&pbc, 3, di->rep_type); > + put_bits(&pbc, 3, 0); > + put_bits(&pbc, 1, 0); > + put_bits64(&pbc, di->num_pres_code + 1, 0); // ID Tag present for > each presentation. > + flush_put_bits(&pbc); // byte align > + size = put_bits_count(&pbc) >> 3; > + AV_WB32(udts, size); > + > + ret = ff_alloc_extradata(par, size); > + if (ret < 0) > + return ret; > + > + memcpy(par->extradata, udts, size); > + > + return 0; > +} > + > +static int read_header(AVFormatContext *s) > +{ > + AVIOContext *pb = s->pb; > + AVStream *st = avformat_new_stream(s, NULL); > + DTSUHDDemuxContext *dtsuhd = s->priv_data; > + DTSUHDDescriptorInfo di; > + DTSUHDFrameInfo fi; > + int buf_bytes; > + int ret = DTSUHD_INVALID_FRAME; > + int data_start; > + > + if (!(pb->seekable & AVIO_SEEKABLE_NORMAL)) > + return AVERROR(EIO); > + > + dtsuhd->buf = av_malloc(DTSUHD_BUFFER_SIZE); > + dtsuhd->dtsuhd = dtsuhd_create(); > + if (!dtsuhd->buf || !dtsuhd->dtsuhd || !st) > + return AVERROR(ENOMEM); > + > + buf_bytes = avio_read(pb, dtsuhd->buf, DTSUHD_BUFFER_SIZE); > + if (buf_bytes < 0) > + return buf_bytes; > + > + data_start = dtsuhd_strmdata_payload(dtsuhd->buf, buf_bytes, > &dtsuhd->data_end); > + dtsuhd->data_end += data_start; > + if (data_start == 0) > + dtsuhd->data_end = avio_size(pb); // Not a DTSHDHDR chunk file, > decode frames to end of file. > + > + data_start = find_first_syncword(dtsuhd, data_start); > + if (avio_seek(pb, data_start, SEEK_SET) < 0) > + return AVERROR(EINVAL); > + > + ret = dtsuhd_frame(dtsuhd->dtsuhd, dtsuhd->buf + data_start, > + buf_bytes - data_start, &fi, &di); > + if (ret != DTSUHD_OK || !di.valid) { > + av_log(s, AV_LOG_ERROR, "Unable to process DTS-UHD file. File may > be invalid.\n"); > + return AVERROR_INVALIDDATA; > + } > + > + ffstream(st)->need_parsing = AVSTREAM_PARSE_FULL_RAW; > + st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO; > + st->codecpar->codec_id = s->iformat->raw_codec_id; > + st->codecpar->ch_layout.order = AV_CHANNEL_ORDER_NATIVE; > + st->codecpar->ch_layout.nb_channels = di.channel_count; > + st->codecpar->ch_layout.u.mask = di.ffmpeg_channel_mask; > + st->codecpar->codec_tag = AV_RL32(di.coding_name); > + st->codecpar->frame_size = 512 << di.frame_duration_code; > + st->codecpar->sample_rate = di.sample_rate; > + > +#if FF_API_OLD_CHANNEL_LAYOUT > +FF_DISABLE_DEPRECATION_WARNINGS > + st->codecpar->channels = di.channel_count; > + st->codecpar->channel_layout = di.ffmpeg_channel_mask; > +FF_ENABLE_DEPRECATION_WARNINGS > +#endif > + > + ret = write_extradata(st->codecpar, &di); > + if (ret < 0) > + return ret; > + > + if (st->codecpar->sample_rate) > + avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate); > + > + return 0; > +} > + > +static int read_packet(AVFormatContext *s, AVPacket *pkt) > +{ > + DTSUHDDemuxContext *dtsuhd = s->priv_data; > + int64_t size, left; > + int ret; > + > + left = dtsuhd->data_end - avio_tell(s->pb); > + size = FFMIN(left, DTSUHD_MAX_FRAME_SIZE); > + if (size <= 0) > + return AVERROR_EOF; > + > + ret = av_get_packet(s->pb, pkt, size); > + if (ret < 0) > + return ret; > + > + pkt->stream_index = 0; > + > + return ret; > +} > + > +AVInputFormat ff_dtsuhd_demuxer = { > + .name = "dtsuhd", > + .long_name = NULL_IF_CONFIG_SMALL("DTS-UHD"), > + .priv_data_size = sizeof(DTSUHDDemuxContext), > + .read_probe = probe, > + .read_header = read_header, > + .read_packet = read_packet, > + .read_close = read_close, > + .flags = AVFMT_GENERIC_INDEX, > + .extensions = "dtsx", > + .raw_codec_id = AV_CODEC_ID_DTSUHD, > +}; > diff --git a/libavformat/movenc.c b/libavformat/movenc.c > index c370922c7d..e727407694 100644 > --- a/libavformat/movenc.c > +++ b/libavformat/movenc.c > @@ -762,6 +762,24 @@ static int mov_write_esds_tag(AVIOContext *pb, > MOVTrack *track) // Basic > return update_size(pb, pos); > } > +static int mov_write_udts_tag(AVIOContext *pb, MOVTrack *track) > +{ > + if (track->vos_len < 12) { > + av_log(pb, AV_LOG_ERROR, > + "Cannot write moov atom before DTS-UHD packets." > + " Set the delay_moov flag to fix this.\n"); > + return AVERROR(EINVAL); > + } > + > + /* Write vos_data is udts box. */ > + if (memcmp(track->vos_data + 4, "udts", 4) == 0) { > + avio_write(pb, track->vos_data, track->vos_len); > + return track->vos_len; > + } > + > + return 0; > +} > + > static int mov_pcm_le_gt16(enum AVCodecID codec_id) > { > return codec_id == AV_CODEC_ID_PCM_S24LE || > @@ -1367,6 +1385,8 @@ static int mov_write_audio_tag(AVFormatContext *s, > AVIOContext *pb, MOVMuxContex > ret = mov_write_dops_tag(s, pb, track); > else if (track->par->codec_id == AV_CODEC_ID_TRUEHD) > ret = mov_write_dmlp_tag(s, pb, track); > + else if (track->par->codec_id == AV_CODEC_ID_DTSUHD) > + ret = mov_write_udts_tag(pb, track); > else if (tag == MOV_MP4_IPCM_TAG || tag == MOV_MP4_FPCM_TAG) { > if (track->par->ch_layout.nb_channels > 1) > ret = mov_write_chnl_tag(s, pb, track); > @@ -2781,6 +2801,7 @@ static int mov_write_stbl_tag(AVFormatContext *s, > AVIOContext *pb, MOVMuxContext > if ((track->par->codec_type == AVMEDIA_TYPE_VIDEO || > track->par->codec_id == AV_CODEC_ID_TRUEHD || > track->par->codec_id == AV_CODEC_ID_MPEGH_3D_AUDIO || > + track->par->codec_id == AV_CODEC_ID_DTSUHD || > track->par->codec_tag == MKTAG('r','t','p',' ')) && > track->has_keyframes && track->has_keyframes < track->entry) > mov_write_stss_tag(pb, track, MOV_SYNC_SAMPLE); > @@ -5673,6 +5694,14 @@ static void mov_parse_vc1_frame(AVPacket *pkt, > MOVTrack *trk) > } > } > +static void mov_parse_dtsuhd_frame(AVPacket *pkt, MOVTrack *trk) > +{ > + if (pkt->size > 4 && AV_RB32(pkt->data) == 0x40411BF2) { > + trk->cluster[trk->entry].flags |= MOV_SYNC_SAMPLE; > + trk->has_keyframes++; > + } > + } > + > static void mov_parse_truehd_frame(AVPacket *pkt, MOVTrack *trk) > { > int length; > @@ -6343,6 +6372,8 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket > *pkt) > mov_parse_vc1_frame(pkt, trk); > } else if (par->codec_id == AV_CODEC_ID_TRUEHD) { > mov_parse_truehd_frame(pkt, trk); > + } else if (par->codec_id == AV_CODEC_ID_DTSUHD) { > + mov_parse_dtsuhd_frame(pkt, trk); > } else if (pkt->flags & AV_PKT_FLAG_KEY) { > if (mov->mode == MODE_MOV && par->codec_id == > AV_CODEC_ID_MPEG2VIDEO && > trk->entry > 0) { // force sync sample for the first key > frame > @@ -7800,6 +7831,7 @@ static const AVCodecTag codec_mp4_tags[] = { > { AV_CODEC_ID_AC3, MKTAG('a', 'c', '-', '3') }, > { AV_CODEC_ID_EAC3, MKTAG('e', 'c', '-', '3') }, > { AV_CODEC_ID_DTS, MKTAG('m', 'p', '4', 'a') }, > + { AV_CODEC_ID_DTSUHD, MKTAG('d', 't', 's', 'x') }, > { AV_CODEC_ID_TRUEHD, MKTAG('m', 'l', 'p', 'a') }, > { AV_CODEC_ID_FLAC, MKTAG('f', 'L', 'a', 'C') }, > { AV_CODEC_ID_OPUS, MKTAG('O', 'p', 'u', 's') }, > diff --git a/libavformat/version.h b/libavformat/version.h > index cc56b7cf5c..384cbd49cc 100644 > --- a/libavformat/version.h > +++ b/libavformat/version.h > @@ -31,7 +31,7 @@ > #include "version_major.h" > -#define LIBAVFORMAT_VERSION_MINOR 4 > +#define LIBAVFORMAT_VERSION_MINOR 5 > #define LIBAVFORMAT_VERSION_MICRO 101 > #define LIBAVFORMAT_VERSION_INT > AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \ > -- > 2.17.1 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".