https://github.com/michaelselehov updated https://github.com/llvm/llvm-project/pull/205587
>From a18f889b03cca78e8aaaa3625ca9575dc795b3d3 Mon Sep 17 00:00:00 2001 From: mselehov <[email protected]> Date: Wed, 24 Jun 2026 10:59:41 -0500 Subject: [PATCH 1/3] [OffloadBundler] Bound compressed bundles by header size, not magic scan The compressed offload bundle (CCOB) readers located the boundary between concatenated bundles by scanning for the literal 4-byte magic "CCOB". Those bytes can appear by chance inside a compressed payload, so a single valid bundle could be truncated at the spurious magic and then fail to decompress with "Src size is incorrect". At runtime this surfaced as hipErrorInvalidImage ("device kernel image is invalid") when loading affected HIP code objects. Use the authoritative FileSize recorded in the compressed bundle header (CompressedBundleHeader::tryParse, present for V2/V3) to delimit the current bundle, and search for the next bundle's "CCOB" magic only past that point. This keeps multi-bundle iteration working (and tolerant of inter-bundle padding) while ignoring magic-byte collisions inside the payload. Bundles without a recorded size (legacy V1) fall back to the previous magic scan. Fixed in all three readers: clang's ListBundleIDsInFile and UnbundleFiles, and Object's extractOffloadBundle. The regression test embeds the bytes "CCOB" inside a zstd skippable frame in a single compressed bundle's payload: the decompressor ignores the frame, but the old magic scan stopped at it. --- clang/lib/Driver/OffloadBundler.cpp | 41 +++++++- .../clang-offload-bundler-magic-collision.co | Bin 0 -> 157 bytes .../clang-offload-bundler-magic-collision.py | 91 ++++++++++++++++++ .../clang-offload-bundler-magic-collision.c | 27 ++++++ llvm/lib/Object/OffloadBundle.cpp | 33 ++++++- 5 files changed, 183 insertions(+), 9 deletions(-) create mode 100644 clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.co create mode 100644 clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.py create mode 100644 clang/test/Driver/clang-offload-bundler-magic-collision.c diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp index 9d6f32c4a4e8f..70b5668c35a30 100644 --- a/clang/lib/Driver/OffloadBundler.cpp +++ b/clang/lib/Driver/OffloadBundler.cpp @@ -1343,6 +1343,18 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input, llvm::toStringRef(DecompressedData)); } +// Returns the on-disk size recorded in the compressed offload bundle header at +// the start of \p Blob, or std::nullopt if the header carries no size field. +static std::optional<size_t> getCompressedBundleSize(StringRef Blob) { + Expected<CompressedOffloadBundle::CompressedBundleHeader> HeaderOrErr = + CompressedOffloadBundle::CompressedBundleHeader::tryParse(Blob); + if (!HeaderOrErr) { + consumeError(HeaderOrErr.takeError()); + return std::nullopt; + } + return HeaderOrErr->FileSize; +} + // List bundle IDs. Return true if an error was found. Error OffloadBundler::ListBundleIDsInFile( StringRef InputFileName, const OffloadBundlerConfig &BundlerConfig) { @@ -1364,15 +1376,25 @@ Error OffloadBundler::ListBundleIDsInFile( (**Contents).getBuffer().drop_front(Offset), "", /*RequiresNullTerminator=*/false); + size_t CurBundleEnd = StringRef::npos; if (identify_magic((*Buffer).getBuffer()) == file_magic::offload_bundle_compressed) { - NextBundleStart = (*Buffer).getBuffer().find("CCOB", 4); + // Locate this bundle's end and the next bundle from the header size. + if (std::optional<size_t> Size = + getCompressedBundleSize((*Buffer).getBuffer())) { + CurBundleEnd = *Size; + NextBundleStart = (*Buffer).getBuffer().find("CCOB", *Size); + } else { + // Legacy bundle without a recorded size: fall back to magic scanning. + NextBundleStart = (*Buffer).getBuffer().find("CCOB", 4); + CurBundleEnd = NextBundleStart; + } } else NextBundleStart = StringRef::npos; ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr = MemoryBuffer::getMemBuffer( - (*Buffer).getBuffer().take_front(NextBundleStart), + (*Buffer).getBuffer().take_front(CurBundleEnd), InputFileName, // FileName, false); if (std::error_code EC = CodeOrErr.getError()) @@ -1613,19 +1635,30 @@ Error OffloadBundler::UnbundleFiles() { (**CodeOrErr).getBuffer().drop_front(Offset), "", /*RequiresNullTerminator=*/false); + size_t CurBundleEnd = StringRef::npos; if (identify_magic((*Buffer).getBuffer()) == file_magic::offload_bundle_compressed) { - NextBundleStart = (*Buffer).getBuffer().find("CCOB", 4); + // Locate this bundle's end and the next bundle from the header size. + if (std::optional<size_t> Size = + getCompressedBundleSize((*Buffer).getBuffer())) { + CurBundleEnd = *Size; + NextBundleStart = (*Buffer).getBuffer().find("CCOB", *Size); + } else { + // Legacy bundle without a recorded size: fall back to magic scanning. + NextBundleStart = (*Buffer).getBuffer().find("CCOB", 4); + CurBundleEnd = NextBundleStart; + } } else if (identify_magic((*Buffer).getBuffer()) == file_magic::offload_bundle) { NextBundleStart = (*Buffer).getBuffer().find( OFFLOAD_BUNDLER_MAGIC_STR, sizeof(OFFLOAD_BUNDLER_MAGIC_STR)); + CurBundleEnd = NextBundleStart; } else NextBundleStart = StringRef::npos; ErrorOr<std::unique_ptr<MemoryBuffer>> BlobOrErr = MemoryBuffer::getMemBuffer( - (*Buffer).getBuffer().take_front(NextBundleStart), + (*Buffer).getBuffer().take_front(CurBundleEnd), BundlerConfig.InputFileNames.front(), /*RequiresNullTerminator=*/false); if (std::error_code EC = BlobOrErr.getError()) diff --git a/clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.co b/clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.co new file mode 100644 index 0000000000000000000000000000000000000000..4a7c7e90ab0849aa2fa5c17b770598d0cda349f7 GIT binary patch literal 157 zcmZ>E_IF}tU}Tuf00DcT^pQjSwvntG>;klWC0KxB&Omh<TlN1c>~UpgP+^UaclL4g zbC36TbMx_cbcuHg^>guYjgMzy=wgs$kY&ioEYMBNO-WDA%P3CN)lE;Uurx4RY~h@r xSCX1nqL81akdj)KnVhPSmYI{PV8~^}#li4t4f_VGTW^CRWfOSV*qE&w3;~)|Drx`# literal 0 HcmV?d00001 diff --git a/clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.py b/clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.py new file mode 100644 index 0000000000000..07d3198165dd8 --- /dev/null +++ b/clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +"""Regenerate clang-offload-bundler-magic-collision.co. + +This input reproduces a magic-byte collision in the compressed offload bundle +(CCOB) reader. The reader used to find the boundary between concatenated +bundles by scanning for the literal 4-byte magic "CCOB"; because that magic can +appear by chance inside a compressed payload, the reader could truncate a valid +single bundle and then fail to decompress it. + +To make the collision deterministic we take a real compressed bundle produced +by clang-offload-bundler and splice a zstd *skippable frame* whose body is the +bytes "CCOB" into the compressed region. A zstd skippable frame is ignored by +the decompressor, so the bundle still decompresses correctly, but a naive +find("CCOB", 4) scan stops at the planted bytes. + +The bundle's header FileSize is updated so it remains authoritative. A reader +that advances by FileSize handles this file correctly; a reader that scans for +"CCOB" truncates and fails. + +Usage: + clang-offload-bundler-magic-collision.py <clang-offload-bundler> [output.co] +""" + +import os +import struct +import subprocess +import sys +import tempfile + +# zstd skippable frame magic numbers are 0x184D2A50..0x184D2A5F. +ZSTD_SKIPPABLE_MAGIC = 0x184D2A50 +PLANTED_MAGIC = b"CCOB" + + +def main(): + if len(sys.argv) < 2: + sys.exit("usage: %s <clang-offload-bundler> [output.co]" % sys.argv[0]) + bundler = sys.argv[1] + out = sys.argv[2] if len(sys.argv) > 2 else \ + os.path.join(os.path.dirname(os.path.abspath(__file__)), + "clang-offload-bundler-magic-collision.co") + + with tempfile.TemporaryDirectory() as d: + dev1 = os.path.join(d, "dev1") + dev2 = os.path.join(d, "dev2") + with open(dev1, "wb") as f: + f.write(b"Content of device file 1\n") + with open(dev2, "wb") as f: + f.write(b"Content of device file 2\n") + base = os.path.join(d, "base.co") + subprocess.run( + [bundler, "-compress", "-type=bc", + "-targets=hip-amdgcn-amd-amdhsa--gfx906," + "hip-amdgcn-amd-amdhsa--gfx908", + "-input=" + dev1, "-input=" + dev2, "-output=" + base], + check=True) + data = bytearray(open(base, "rb").read()) + + if data[:4] != b"CCOB": + sys.exit("unexpected magic in compressed bundle") + version = struct.unpack_from("<H", data, 4)[0] + if version == 2: + header_size, fs_off, fs_fmt = 24, 8, "<I" + elif version == 3: + header_size, fs_off, fs_fmt = 32, 8, "<Q" + else: + sys.exit("unsupported compressed bundle version %d" % version) + + file_size = struct.unpack_from(fs_fmt, data, fs_off)[0] + if file_size != len(data): + sys.exit("FileSize %d != file length %d" % (file_size, len(data))) + + # Splice a zstd skippable frame carrying "CCOB" right after the header, so + # the planted magic lands inside the compressed region. + skip = struct.pack("<II", ZSTD_SKIPPABLE_MAGIC, len(PLANTED_MAGIC)) \ + + PLANTED_MAGIC + data = data[:header_size] + skip + data[header_size:] + struct.pack_into(fs_fmt, data, fs_off, file_size + len(skip)) + + planted = bytes(data).find(PLANTED_MAGIC, 4) + if planted != header_size + 8: + sys.exit("planted magic at unexpected offset %d" % planted) + + with open(out, "wb") as f: + f.write(data) + print("wrote %s: version=%d header=%d FileSize=%d planted 'CCOB' at %d" % + (out, version, header_size, file_size + len(skip), planted)) + + +if __name__ == "__main__": + main() diff --git a/clang/test/Driver/clang-offload-bundler-magic-collision.c b/clang/test/Driver/clang-offload-bundler-magic-collision.c new file mode 100644 index 0000000000000..97863162647a4 --- /dev/null +++ b/clang/test/Driver/clang-offload-bundler-magic-collision.c @@ -0,0 +1,27 @@ +// REQUIRES: zstd + +// Regression test for a magic-byte collision in the compressed offload bundle +// (CCOB) reader. To find the boundary between concatenated bundles the reader +// used to scan for the literal 4-byte magic "CCOB"; those bytes can appear by +// chance inside a compressed payload, which truncated a valid single bundle and +// made decompression fail with "Src size is incorrect". +// +// Inputs/clang-offload-bundler-magic-collision.co is a single compressed bundle +// whose compressed region embeds the bytes "CCOB" inside a zstd skippable frame +// (regenerate with Inputs/clang-offload-bundler-magic-collision.py). The frame +// is ignored by the decompressor, so the bundle is valid, but a naive +// find("CCOB", 4) scan stops at the planted bytes. A reader that advances by +// the header's FileSize handles this file correctly. + +// RUN: clang-offload-bundler -type=bc -list \ +// RUN: -input=%S/Inputs/clang-offload-bundler-magic-collision.co \ +// RUN: | FileCheck %s --check-prefix=LIST +// LIST-DAG: hip-amdgcn-amd-amdhsa--gfx906 +// LIST-DAG: hip-amdgcn-amd-amdhsa--gfx908 + +// RUN: clang-offload-bundler -type=bc -unbundle \ +// RUN: -targets=hip-amdgcn-amd-amdhsa--gfx906 \ +// RUN: -input=%S/Inputs/clang-offload-bundler-magic-collision.co \ +// RUN: -output=%t.gfx906 +// RUN: FileCheck %s --check-prefix=EXTRACT --input-file=%t.gfx906 +// EXTRACT: Content of device file 1 diff --git a/llvm/lib/Object/OffloadBundle.cpp b/llvm/lib/Object/OffloadBundle.cpp index 93fb2ab1affc7..cec97a330927a 100644 --- a/llvm/lib/Object/OffloadBundle.cpp +++ b/llvm/lib/Object/OffloadBundle.cpp @@ -28,6 +28,18 @@ using namespace llvm::object; static TimerGroup OffloadBundlerTimerGroup("Offload Bundler Timer Group", "Timer group for offload bundler"); +// Returns the on-disk size recorded in the compressed offload bundle header at +// the start of \p Blob, or std::nullopt if the header carries no size field. +static std::optional<size_t> getCompressedBundleSize(StringRef Blob) { + Expected<CompressedOffloadBundle::CompressedBundleHeader> HeaderOrErr = + CompressedOffloadBundle::CompressedBundleHeader::tryParse(Blob); + if (!HeaderOrErr) { + consumeError(HeaderOrErr.takeError()); + return std::nullopt; + } + return HeaderOrErr->FileSize; +} + // Extract an Offload bundle (usually a Offload Bundle) from a fat_bin // section. Error extractOffloadBundle(MemoryBufferRef Contents, uint64_t SectionOffset, @@ -49,15 +61,26 @@ Error extractOffloadBundle(MemoryBufferRef Contents, uint64_t SectionOffset, if (identify_magic((*Buffer).getBuffer()) == file_magic::offload_bundle_compressed) { Magic = "CCOB"; - // Decompress this bundle first. - NextbundleStart = (*Buffer).getBuffer().find(Magic, Magic.size()); - if (NextbundleStart == StringRef::npos) + // Locate this bundle's end and the next bundle from the header size. + size_t CurBundleEnd; + if (std::optional<size_t> Size = + getCompressedBundleSize((*Buffer).getBuffer())) { + CurBundleEnd = *Size; + NextbundleStart = (*Buffer).getBuffer().find(Magic, *Size); + } else { + // Legacy bundle without a recorded size: fall back to magic scanning. + NextbundleStart = (*Buffer).getBuffer().find(Magic, Magic.size()); + CurBundleEnd = NextbundleStart; + } + if (NextbundleStart == StringRef::npos) { NextbundleStart = (*Buffer).getBuffer().size(); + if (CurBundleEnd == StringRef::npos) + CurBundleEnd = (*Buffer).getBuffer().size(); + } ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr = MemoryBuffer::getMemBuffer( - (*Buffer).getBuffer().take_front(NextbundleStart), FileName, - false); + (*Buffer).getBuffer().take_front(CurBundleEnd), FileName, false); if (std::error_code EC = CodeOrErr.getError()) return createFileError(FileName, EC); >From 9eea8376a0fbbb6ba3f43a8fc4d1e6995c23e5fc Mon Sep 17 00:00:00 2001 From: mselehov <[email protected]> Date: Wed, 24 Jun 2026 11:12:48 -0500 Subject: [PATCH 2/3] [OffloadBundler] Format magic-collision input generator with black --- .../clang-offload-bundler-magic-collision.py | 37 +++++++++++++------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.py b/clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.py index 07d3198165dd8..309a9af25a33b 100644 --- a/clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.py +++ b/clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.py @@ -36,9 +36,14 @@ def main(): if len(sys.argv) < 2: sys.exit("usage: %s <clang-offload-bundler> [output.co]" % sys.argv[0]) bundler = sys.argv[1] - out = sys.argv[2] if len(sys.argv) > 2 else \ - os.path.join(os.path.dirname(os.path.abspath(__file__)), - "clang-offload-bundler-magic-collision.co") + out = ( + sys.argv[2] + if len(sys.argv) > 2 + else os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "clang-offload-bundler-magic-collision.co", + ) + ) with tempfile.TemporaryDirectory() as d: dev1 = os.path.join(d, "dev1") @@ -49,11 +54,18 @@ def main(): f.write(b"Content of device file 2\n") base = os.path.join(d, "base.co") subprocess.run( - [bundler, "-compress", "-type=bc", - "-targets=hip-amdgcn-amd-amdhsa--gfx906," - "hip-amdgcn-amd-amdhsa--gfx908", - "-input=" + dev1, "-input=" + dev2, "-output=" + base], - check=True) + [ + bundler, + "-compress", + "-type=bc", + "-targets=hip-amdgcn-amd-amdhsa--gfx906," + "hip-amdgcn-amd-amdhsa--gfx908", + "-input=" + dev1, + "-input=" + dev2, + "-output=" + base, + ], + check=True, + ) data = bytearray(open(base, "rb").read()) if data[:4] != b"CCOB": @@ -72,8 +84,7 @@ def main(): # Splice a zstd skippable frame carrying "CCOB" right after the header, so # the planted magic lands inside the compressed region. - skip = struct.pack("<II", ZSTD_SKIPPABLE_MAGIC, len(PLANTED_MAGIC)) \ - + PLANTED_MAGIC + skip = struct.pack("<II", ZSTD_SKIPPABLE_MAGIC, len(PLANTED_MAGIC)) + PLANTED_MAGIC data = data[:header_size] + skip + data[header_size:] struct.pack_into(fs_fmt, data, fs_off, file_size + len(skip)) @@ -83,8 +94,10 @@ def main(): with open(out, "wb") as f: f.write(data) - print("wrote %s: version=%d header=%d FileSize=%d planted 'CCOB' at %d" % - (out, version, header_size, file_size + len(skip), planted)) + print( + "wrote %s: version=%d header=%d FileSize=%d planted 'CCOB' at %d" + % (out, version, header_size, file_size + len(skip), planted) + ) if __name__ == "__main__": >From e6e2ff875b34e744eec316cb4c5dd709e260269a Mon Sep 17 00:00:00 2001 From: mselehov <[email protected]> Date: Thu, 25 Jun 2026 04:34:51 -0500 Subject: [PATCH 3/3] [Offload] Add llvm-objdump --offloading test for CCOB magic-collision fix --- .../Offloading/fatbin-magic-collision.test | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 llvm/test/tools/llvm-objdump/Offloading/fatbin-magic-collision.test diff --git a/llvm/test/tools/llvm-objdump/Offloading/fatbin-magic-collision.test b/llvm/test/tools/llvm-objdump/Offloading/fatbin-magic-collision.test new file mode 100644 index 0000000000000..3f1882dc8f689 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/Offloading/fatbin-magic-collision.test @@ -0,0 +1,35 @@ +## Regression test for a magic-byte collision in the compressed offload bundle +## (CCOB) reader used by --offloading (llvm::object::extractOffloadBundle). +## +## The reader used to find the boundary between concatenated bundles by scanning +## for the literal 4-byte magic "CCOB". Those bytes can appear by chance inside a +## compressed payload, which truncated a valid single bundle so it failed to +## decompress. The .hip_fatbin below is a single compressed bundle whose +## compressed region embeds the bytes "CCOB" inside a zstd skippable frame +## (ignored by the decompressor); a reader that advances by the header's FileSize +## handles it correctly. Regenerate the bundle with +## clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.py. + +# REQUIRES: zstd +# RUN: yaml2obj %s -o %t.elf +# RUN: llvm-objdump --offloading %t.elf | FileCheck %s +# RUN: FileCheck %s --check-prefix=DEV1 \ +# RUN: --input-file=%t.elf.0.hip-amdgcn-amd-amdhsa--gfx906 + +# CHECK: Extracting offload bundle: {{.*}}.elf.0.hip-amdgcn-amd-amdhsa--gfx906 +# CHECK: Extracting offload bundle: {{.*}}.elf.0.hip-amdgcn-amd-amdhsa--gfx908 +# DEV1: Content of device file 1 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .hip_fatbin + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + AddressAlign: 0x1000 + Content: 43434F42030001009D00000000000000BC00000000000000C4C20F3D5905B03E502A4D180400000043434F4228B52FFD20BC45030024055F5F434C414E475F4F46464C4F41445F42554E444C455F5F02008A0019001D006869702D616D6467636E6873612D2D676678393036A338436F6E74656E74206F66206465766963652066696C6520310A320A0800F2AC07B03ADAED52591D600C0606033B8031 +... _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
