llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-driver
Author: michaelselehov
<details>
<summary>Changes</summary>
The compressed offload bundle (CCOB) readers located the boundary between
concatenated bundles by scanning for the literal 4-byte magic "CCOB". Those
bytes can appear by chance inside a compressed payload, so a single valid
bundle could be truncated at the spurious magic and then fail to decompress
with "Src size is incorrect". At runtime this surfaced as hipErrorInvalidImage
("device kernel image is invalid") when loading affected HIP code objects.
Use the authoritative FileSize recorded in the compressed bundle header
(CompressedBundleHeader::tryParse, present for V2/V3) to delimit the current
bundle, and search for the next bundle's "CCOB" magic only past that point.
This keeps multi-bundle iteration working (and tolerant of inter-bundle
padding) while ignoring magic-byte collisions inside the payload. Bundles
without a recorded size (legacy V1) fall back to the previous magic scan.
Fixed in all three readers: clang's ListBundleIDsInFile and UnbundleFiles, and
Object's extractOffloadBundle.
The regression test embeds the bytes "CCOB" inside a zstd skippable frame in a
single compressed bundle's payload: the decompressor ignores the frame, but the
old magic scan stopped at it.
---
Full diff: https://github.com/llvm/llvm-project/pull/205587.diff
5 Files Affected:
- (modified) clang/lib/Driver/OffloadBundler.cpp (+37-4)
- (added) clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.co ()
- (added) clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.py
(+91)
- (added) clang/test/Driver/clang-offload-bundler-magic-collision.c (+27)
- (modified) llvm/lib/Object/OffloadBundle.cpp (+28-5)
``````````diff
diff --git a/clang/lib/Driver/OffloadBundler.cpp
b/clang/lib/Driver/OffloadBundler.cpp
index 9d6f32c4a4e8f..70b5668c35a30 100644
--- a/clang/lib/Driver/OffloadBundler.cpp
+++ b/clang/lib/Driver/OffloadBundler.cpp
@@ -1343,6 +1343,18 @@ CompressedOffloadBundle::decompress(const
llvm::MemoryBuffer &Input,
llvm::toStringRef(DecompressedData));
}
+// Returns the on-disk size recorded in the compressed offload bundle header at
+// the start of \p Blob, or std::nullopt if the header carries no size field.
+static std::optional<size_t> getCompressedBundleSize(StringRef Blob) {
+ Expected<CompressedOffloadBundle::CompressedBundleHeader> HeaderOrErr =
+ CompressedOffloadBundle::CompressedBundleHeader::tryParse(Blob);
+ if (!HeaderOrErr) {
+ consumeError(HeaderOrErr.takeError());
+ return std::nullopt;
+ }
+ return HeaderOrErr->FileSize;
+}
+
// List bundle IDs. Return true if an error was found.
Error OffloadBundler::ListBundleIDsInFile(
StringRef InputFileName, const OffloadBundlerConfig &BundlerConfig) {
@@ -1364,15 +1376,25 @@ Error OffloadBundler::ListBundleIDsInFile(
(**Contents).getBuffer().drop_front(Offset), "",
/*RequiresNullTerminator=*/false);
+ size_t CurBundleEnd = StringRef::npos;
if (identify_magic((*Buffer).getBuffer()) ==
file_magic::offload_bundle_compressed) {
- NextBundleStart = (*Buffer).getBuffer().find("CCOB", 4);
+ // Locate this bundle's end and the next bundle from the header size.
+ if (std::optional<size_t> Size =
+ getCompressedBundleSize((*Buffer).getBuffer())) {
+ CurBundleEnd = *Size;
+ NextBundleStart = (*Buffer).getBuffer().find("CCOB", *Size);
+ } else {
+ // Legacy bundle without a recorded size: fall back to magic scanning.
+ NextBundleStart = (*Buffer).getBuffer().find("CCOB", 4);
+ CurBundleEnd = NextBundleStart;
+ }
} else
NextBundleStart = StringRef::npos;
ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
MemoryBuffer::getMemBuffer(
- (*Buffer).getBuffer().take_front(NextBundleStart),
+ (*Buffer).getBuffer().take_front(CurBundleEnd),
InputFileName, // FileName,
false);
if (std::error_code EC = CodeOrErr.getError())
@@ -1613,19 +1635,30 @@ Error OffloadBundler::UnbundleFiles() {
(**CodeOrErr).getBuffer().drop_front(Offset), "",
/*RequiresNullTerminator=*/false);
+ size_t CurBundleEnd = StringRef::npos;
if (identify_magic((*Buffer).getBuffer()) ==
file_magic::offload_bundle_compressed) {
- NextBundleStart = (*Buffer).getBuffer().find("CCOB", 4);
+ // Locate this bundle's end and the next bundle from the header size.
+ if (std::optional<size_t> Size =
+ getCompressedBundleSize((*Buffer).getBuffer())) {
+ CurBundleEnd = *Size;
+ NextBundleStart = (*Buffer).getBuffer().find("CCOB", *Size);
+ } else {
+ // Legacy bundle without a recorded size: fall back to magic scanning.
+ NextBundleStart = (*Buffer).getBuffer().find("CCOB", 4);
+ CurBundleEnd = NextBundleStart;
+ }
} else if (identify_magic((*Buffer).getBuffer()) ==
file_magic::offload_bundle) {
NextBundleStart = (*Buffer).getBuffer().find(
OFFLOAD_BUNDLER_MAGIC_STR, sizeof(OFFLOAD_BUNDLER_MAGIC_STR));
+ CurBundleEnd = NextBundleStart;
} else
NextBundleStart = StringRef::npos;
ErrorOr<std::unique_ptr<MemoryBuffer>> BlobOrErr =
MemoryBuffer::getMemBuffer(
- (*Buffer).getBuffer().take_front(NextBundleStart),
+ (*Buffer).getBuffer().take_front(CurBundleEnd),
BundlerConfig.InputFileNames.front(),
/*RequiresNullTerminator=*/false);
if (std::error_code EC = BlobOrErr.getError())
diff --git a/clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.co
b/clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.co
new file mode 100644
index 0000000000000..4a7c7e90ab084
Binary files /dev/null and
b/clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.co differ
diff --git a/clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.py
b/clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.py
new file mode 100644
index 0000000000000..07d3198165dd8
--- /dev/null
+++ b/clang/test/Driver/Inputs/clang-offload-bundler-magic-collision.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+"""Regenerate clang-offload-bundler-magic-collision.co.
+
+This input reproduces a magic-byte collision in the compressed offload bundle
+(CCOB) reader. The reader used to find the boundary between concatenated
+bundles by scanning for the literal 4-byte magic "CCOB"; because that magic can
+appear by chance inside a compressed payload, the reader could truncate a valid
+single bundle and then fail to decompress it.
+
+To make the collision deterministic we take a real compressed bundle produced
+by clang-offload-bundler and splice a zstd *skippable frame* whose body is the
+bytes "CCOB" into the compressed region. A zstd skippable frame is ignored by
+the decompressor, so the bundle still decompresses correctly, but a naive
+find("CCOB", 4) scan stops at the planted bytes.
+
+The bundle's header FileSize is updated so it remains authoritative. A reader
+that advances by FileSize handles this file correctly; a reader that scans for
+"CCOB" truncates and fails.
+
+Usage:
+ clang-offload-bundler-magic-collision.py <clang-offload-bundler> [output.co]
+"""
+
+import os
+import struct
+import subprocess
+import sys
+import tempfile
+
+# zstd skippable frame magic numbers are 0x184D2A50..0x184D2A5F.
+ZSTD_SKIPPABLE_MAGIC = 0x184D2A50
+PLANTED_MAGIC = b"CCOB"
+
+
+def main():
+ if len(sys.argv) < 2:
+ sys.exit("usage: %s <clang-offload-bundler> [output.co]" % sys.argv[0])
+ bundler = sys.argv[1]
+ out = sys.argv[2] if len(sys.argv) > 2 else \
+ os.path.join(os.path.dirname(os.path.abspath(__file__)),
+ "clang-offload-bundler-magic-collision.co")
+
+ with tempfile.TemporaryDirectory() as d:
+ dev1 = os.path.join(d, "dev1")
+ dev2 = os.path.join(d, "dev2")
+ with open(dev1, "wb") as f:
+ f.write(b"Content of device file 1\n")
+ with open(dev2, "wb") as f:
+ f.write(b"Content of device file 2\n")
+ base = os.path.join(d, "base.co")
+ subprocess.run(
+ [bundler, "-compress", "-type=bc",
+ "-targets=hip-amdgcn-amd-amdhsa--gfx906,"
+ "hip-amdgcn-amd-amdhsa--gfx908",
+ "-input=" + dev1, "-input=" + dev2, "-output=" + base],
+ check=True)
+ data = bytearray(open(base, "rb").read())
+
+ if data[:4] != b"CCOB":
+ sys.exit("unexpected magic in compressed bundle")
+ version = struct.unpack_from("<H", data, 4)[0]
+ if version == 2:
+ header_size, fs_off, fs_fmt = 24, 8, "<I"
+ elif version == 3:
+ header_size, fs_off, fs_fmt = 32, 8, "<Q"
+ else:
+ sys.exit("unsupported compressed bundle version %d" % version)
+
+ file_size = struct.unpack_from(fs_fmt, data, fs_off)[0]
+ if file_size != len(data):
+ sys.exit("FileSize %d != file length %d" % (file_size, len(data)))
+
+ # Splice a zstd skippable frame carrying "CCOB" right after the header, so
+ # the planted magic lands inside the compressed region.
+ skip = struct.pack("<II", ZSTD_SKIPPABLE_MAGIC, len(PLANTED_MAGIC)) \
+ + PLANTED_MAGIC
+ data = data[:header_size] + skip + data[header_size:]
+ struct.pack_into(fs_fmt, data, fs_off, file_size + len(skip))
+
+ planted = bytes(data).find(PLANTED_MAGIC, 4)
+ if planted != header_size + 8:
+ sys.exit("planted magic at unexpected offset %d" % planted)
+
+ with open(out, "wb") as f:
+ f.write(data)
+ print("wrote %s: version=%d header=%d FileSize=%d planted 'CCOB' at %d" %
+ (out, version, header_size, file_size + len(skip), planted))
+
+
+if __name__ == "__main__":
+ main()
diff --git a/clang/test/Driver/clang-offload-bundler-magic-collision.c
b/clang/test/Driver/clang-offload-bundler-magic-collision.c
new file mode 100644
index 0000000000000..97863162647a4
--- /dev/null
+++ b/clang/test/Driver/clang-offload-bundler-magic-collision.c
@@ -0,0 +1,27 @@
+// REQUIRES: zstd
+
+// Regression test for a magic-byte collision in the compressed offload bundle
+// (CCOB) reader. To find the boundary between concatenated bundles the reader
+// used to scan for the literal 4-byte magic "CCOB"; those bytes can appear by
+// chance inside a compressed payload, which truncated a valid single bundle
and
+// made decompression fail with "Src size is incorrect".
+//
+// Inputs/clang-offload-bundler-magic-collision.co is a single compressed
bundle
+// whose compressed region embeds the bytes "CCOB" inside a zstd skippable
frame
+// (regenerate with Inputs/clang-offload-bundler-magic-collision.py). The frame
+// is ignored by the decompressor, so the bundle is valid, but a naive
+// find("CCOB", 4) scan stops at the planted bytes. A reader that advances by
+// the header's FileSize handles this file correctly.
+
+// RUN: clang-offload-bundler -type=bc -list \
+// RUN: -input=%S/Inputs/clang-offload-bundler-magic-collision.co \
+// RUN: | FileCheck %s --check-prefix=LIST
+// LIST-DAG: hip-amdgcn-amd-amdhsa--gfx906
+// LIST-DAG: hip-amdgcn-amd-amdhsa--gfx908
+
+// RUN: clang-offload-bundler -type=bc -unbundle \
+// RUN: -targets=hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN: -input=%S/Inputs/clang-offload-bundler-magic-collision.co \
+// RUN: -output=%t.gfx906
+// RUN: FileCheck %s --check-prefix=EXTRACT --input-file=%t.gfx906
+// EXTRACT: Content of device file 1
diff --git a/llvm/lib/Object/OffloadBundle.cpp
b/llvm/lib/Object/OffloadBundle.cpp
index 93fb2ab1affc7..cec97a330927a 100644
--- a/llvm/lib/Object/OffloadBundle.cpp
+++ b/llvm/lib/Object/OffloadBundle.cpp
@@ -28,6 +28,18 @@ using namespace llvm::object;
static TimerGroup OffloadBundlerTimerGroup("Offload Bundler Timer Group",
"Timer group for offload bundler");
+// Returns the on-disk size recorded in the compressed offload bundle header at
+// the start of \p Blob, or std::nullopt if the header carries no size field.
+static std::optional<size_t> getCompressedBundleSize(StringRef Blob) {
+ Expected<CompressedOffloadBundle::CompressedBundleHeader> HeaderOrErr =
+ CompressedOffloadBundle::CompressedBundleHeader::tryParse(Blob);
+ if (!HeaderOrErr) {
+ consumeError(HeaderOrErr.takeError());
+ return std::nullopt;
+ }
+ return HeaderOrErr->FileSize;
+}
+
// Extract an Offload bundle (usually a Offload Bundle) from a fat_bin
// section.
Error extractOffloadBundle(MemoryBufferRef Contents, uint64_t SectionOffset,
@@ -49,15 +61,26 @@ Error extractOffloadBundle(MemoryBufferRef Contents,
uint64_t SectionOffset,
if (identify_magic((*Buffer).getBuffer()) ==
file_magic::offload_bundle_compressed) {
Magic = "CCOB";
- // Decompress this bundle first.
- NextbundleStart = (*Buffer).getBuffer().find(Magic, Magic.size());
- if (NextbundleStart == StringRef::npos)
+ // Locate this bundle's end and the next bundle from the header size.
+ size_t CurBundleEnd;
+ if (std::optional<size_t> Size =
+ getCompressedBundleSize((*Buffer).getBuffer())) {
+ CurBundleEnd = *Size;
+ NextbundleStart = (*Buffer).getBuffer().find(Magic, *Size);
+ } else {
+ // Legacy bundle without a recorded size: fall back to magic scanning.
+ NextbundleStart = (*Buffer).getBuffer().find(Magic, Magic.size());
+ CurBundleEnd = NextbundleStart;
+ }
+ if (NextbundleStart == StringRef::npos) {
NextbundleStart = (*Buffer).getBuffer().size();
+ if (CurBundleEnd == StringRef::npos)
+ CurBundleEnd = (*Buffer).getBuffer().size();
+ }
ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
MemoryBuffer::getMemBuffer(
- (*Buffer).getBuffer().take_front(NextbundleStart), FileName,
- false);
+ (*Buffer).getBuffer().take_front(CurBundleEnd), FileName, false);
if (std::error_code EC = CodeOrErr.getError())
return createFileError(FileName, EC);
``````````
</details>
https://github.com/llvm/llvm-project/pull/205587
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits