https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/153504
>From 11b6ee49f073517b917b7be693ba673d2afe5a59 Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Wed, 13 Aug 2025 17:31:18 -0500 Subject: [PATCH] [LLVM] Introduce 'llvm-offload-wrapper' tool Summary: This is a standalone tool that does the wrapper stage of the `clang-linker-wrapper`. We want this to be an external tool because currently there's no easy way to split apart what the clang-linker-wrapper is doing under the hood. With this tool, users can manually extract files with `clang-offload-packager`, feed them through `clang --target=<triple>` and then use this tool to generate a `.bc` file they can give to the linker. The goal here is to make reproducing the linker wrapper steps easier. --- clang/docs/ClangLinkerWrapper.rst | 4 + llvm/test/CMakeLists.txt | 1 + llvm/test/Other/offload-wrapper.ll | 54 +++++++ .../tools/llvm-offload-wrapper/CMakeLists.txt | 15 ++ .../llvm-offload-wrapper.cpp | 135 ++++++++++++++++++ 5 files changed, 209 insertions(+) create mode 100644 llvm/test/Other/offload-wrapper.ll create mode 100644 llvm/tools/llvm-offload-wrapper/CMakeLists.txt create mode 100644 llvm/tools/llvm-offload-wrapper/llvm-offload-wrapper.cpp diff --git a/clang/docs/ClangLinkerWrapper.rst b/clang/docs/ClangLinkerWrapper.rst index e69cdba434c93..eb38d2b8fb5ee 100644 --- a/clang/docs/ClangLinkerWrapper.rst +++ b/clang/docs/ClangLinkerWrapper.rst @@ -60,6 +60,10 @@ only for the linker wrapper will be forwarded to the wrapped linker job. --v Display the version number and exit -- The separator for the wrapped linker arguments +The linker wrapper will generate the appropriate runtime calls to register the +generated device binary with the offloading runtime. To do this step manually we +provide the ``llvm-offload-wrapper`` utility. + Relocatable Linking =================== diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt index b46f4829605a1..f6333d68a8ea5 100644 --- a/llvm/test/CMakeLists.txt +++ b/llvm/test/CMakeLists.txt @@ -118,6 +118,7 @@ set(LLVM_TEST_DEPENDS llvm-objdump llvm-opt-fuzzer llvm-opt-report + llvm-offload-wrapper llvm-otool llvm-pdbutil llvm-profdata diff --git a/llvm/test/Other/offload-wrapper.ll b/llvm/test/Other/offload-wrapper.ll new file mode 100644 index 0000000000000..7be034e0931f1 --- /dev/null +++ b/llvm/test/Other/offload-wrapper.ll @@ -0,0 +1,54 @@ +; RUN: touch %t + +; RUN: llvm-offload-wrapper --target=x86-64 -kind=hip %t -o %t.bc +; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=HIP + +; HIP: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OA" +; HIP-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OZ" +; HIP-NEXT: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin" +; HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8 +; HIP-NEXT: @.hip.binary_handle = internal global ptr null +; HIP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }] + +; HIP: define internal void @.hip.fatbin_reg() section ".text.startup" { +; HIP-NEXT: entry: +; HIP-NEXT: %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper) +; HIP-NEXT: store ptr %0, ptr @.hip.binary_handle, align 8 +; HIP-NEXT: call void @.hip.globals_reg(ptr %0) +; HIP-NEXT: %1 = call i32 @atexit(ptr @.hip.fatbin_unreg) +; HIP-NEXT: ret void +; HIP-NEXT: } + +; HIP: define internal void @.hip.fatbin_unreg() section ".text.startup" { +; HIP-NEXT: entry: +; HIP-NEXT: %0 = load ptr, ptr @.hip.binary_handle, align 8 +; HIP-NEXT: call void @__hipUnregisterFatBinary(ptr %0) +; HIP-NEXT: ret void +; HIP-NEXT: } + +; RUN: llvm-offload-wrapper --target=x86-64 -kind=cuda %t -o %t.bc +; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=CUDA + +; CUDA: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OA" +; CUDA-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OZ" +; CUDA-NEXT: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".nv_fatbin" +; CUDA-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1180844977, i32 1, ptr @.fatbin_image, ptr null }, section ".nvFatBinSegment", align 8 +; CUDA-NEXT: @.cuda.binary_handle = internal global ptr null +; CUDA-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.cuda.fatbin_reg, ptr null }] + +; CUDA: define internal void @.cuda.fatbin_reg() section ".text.startup" { +; CUDA-NEXT: entry: +; CUDA-NEXT: %0 = call ptr @__cudaRegisterFatBinary(ptr @.fatbin_wrapper) +; CUDA-NEXT: store ptr %0, ptr @.cuda.binary_handle, align 8 +; CUDA-NEXT: call void @.cuda.globals_reg(ptr %0) +; CUDA-NEXT: call void @__cudaRegisterFatBinaryEnd(ptr %0) +; CUDA-NEXT: %1 = call i32 @atexit(ptr @.cuda.fatbin_unreg) +; CUDA-NEXT: ret void +; CUDA-NEXT: } + +; CUDA: define internal void @.cuda.fatbin_unreg() section ".text.startup" { +; CUDA-NEXT: entry: +; CUDA-NEXT: %0 = load ptr, ptr @.cuda.binary_handle, align 8 +; CUDA-NEXT: call void @__cudaUnregisterFatBinary(ptr %0) +; CUDA-NEXT: ret void +; CUDA-NEXT: } diff --git a/llvm/tools/llvm-offload-wrapper/CMakeLists.txt b/llvm/tools/llvm-offload-wrapper/CMakeLists.txt new file mode 100644 index 0000000000000..2e2cdb53b5b41 --- /dev/null +++ b/llvm/tools/llvm-offload-wrapper/CMakeLists.txt @@ -0,0 +1,15 @@ +set(LLVM_LINK_COMPONENTS + BitWriter + Object + Option + FrontendOffloading + Support + TargetParser + ) + +add_llvm_tool(llvm-offload-wrapper + llvm-offload-wrapper.cpp + + DEPENDS + intrinsics_gen + ) diff --git a/llvm/tools/llvm-offload-wrapper/llvm-offload-wrapper.cpp b/llvm/tools/llvm-offload-wrapper/llvm-offload-wrapper.cpp new file mode 100644 index 0000000000000..f9ac3d717979d --- /dev/null +++ b/llvm/tools/llvm-offload-wrapper/llvm-offload-wrapper.cpp @@ -0,0 +1,135 @@ +//===- llvm-offload-wrapper: Create runtime registration code for devices -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Provides a utility for generating runtime registration code for device code. +// We take a binary image (CUDA fatbinary, HIP offload bundle, LLVM binary) and +// create a new IR module that calls the respective runtime to load it on the +// device. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/Frontend/Offloading/OffloadWrapper.h" +#include "llvm/Frontend/Offloading/Utility.h" +#include "llvm/Object/OffloadBinary.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/StringSaver.h" +#include "llvm/Support/WithColor.h" +#include "llvm/TargetParser/Host.h" + +using namespace llvm; + +static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden); + +static cl::OptionCategory + OffloadWrapeprCategory("llvm-offload-wrapper options"); + +static cl::opt<object::OffloadKind> Kind( + "kind", cl::desc("Wrap for offload kind:"), cl::cat(OffloadWrapeprCategory), + cl::Required, + cl::values(clEnumValN(object::OFK_OpenMP, "openmp", "Wrap OpenMP binaries"), + clEnumValN(object::OFK_Cuda, "cuda", "Wrap CUDA binaries"), + clEnumValN(object::OFK_HIP, "hip", "Wrap HIP binaries"))); + +static cl::opt<std::string> OutputFile("o", cl::desc("Write output to <file>."), + cl::value_desc("file"), + cl::cat(OffloadWrapeprCategory)); + +static cl::list<std::string> InputFiles(cl::Positional, + cl::desc("Wrap input from <file>"), + cl::value_desc("file"), cl::OneOrMore, + cl::cat(OffloadWrapeprCategory)); + +static cl::opt<std::string> + TheTriple("triple", cl::desc("Target triple for the wrapper module"), + cl::init(sys::getDefaultTargetTriple()), + cl::cat(OffloadWrapeprCategory)); + +static Error wrapImages(ArrayRef<ArrayRef<char>> BuffersToWrap) { + if (BuffersToWrap.size() > 1 && + (Kind == llvm::object::OFK_Cuda || Kind == llvm::object::OFK_HIP)) + return createStringError( + "CUDA / HIP offloading uses a single fatbinary or offload bundle"); + + LLVMContext Context; + Module M("offload.wrapper.module", Context); + M.setTargetTriple(Triple()); + + switch (Kind) { + case llvm::object::OFK_OpenMP: + if (Error Err = offloading::wrapOpenMPBinaries( + M, BuffersToWrap, offloading::getOffloadEntryArray(M), + /*Suffix=*/"", /*Relocatable=*/false)) + return std::move(Err); + break; + case llvm::object::OFK_Cuda: + if (Error Err = offloading::wrapCudaBinary( + M, BuffersToWrap.front(), offloading::getOffloadEntryArray(M), + /*Suffix=*/"", /*EmitSurfacesAndTextures=*/false)) + return std::move(Err); + break; + case llvm::object::OFK_HIP: + if (Error Err = offloading::wrapHIPBinary( + M, BuffersToWrap.front(), offloading::getOffloadEntryArray(M))) + return std::move(Err); + break; + default: + return createStringError(getOffloadKindName(Kind) + + " wrapping is not supported"); + } + + int FD = -1; + if (std::error_code EC = sys::fs::openFileForWrite(OutputFile, FD)) + return errorCodeToError(EC); + llvm::raw_fd_ostream OS(FD, true); + WriteBitcodeToFile(M, OS); + + return Error::success(); +} + +int main(int argc, char **argv) { + InitLLVM X(argc, argv); + cl::HideUnrelatedOptions(OffloadWrapeprCategory); + cl::ParseCommandLineOptions( + argc, argv, + "Generate runtime registration code for a device binary image\n"); + + if (Help) { + cl::PrintHelpMessage(); + return EXIT_SUCCESS; + } + + auto reportError = [argv](Error E) { + logAllUnhandledErrors(std::move(E), WithColor::error(errs(), argv[0])); + exit(EXIT_FAILURE); + }; + + SmallVector<std::unique_ptr<MemoryBuffer>> Buffers; + SmallVector<ArrayRef<char>> BuffersToWrap; + for (StringRef Input : InputFiles) { + ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = + MemoryBuffer::getFileOrSTDIN(Input); + if (std::error_code EC = BufferOrErr.getError()) + reportError(createFileError(Input, EC)); + std::unique_ptr<MemoryBuffer> &Buffer = + Buffers.emplace_back(std::move(*BufferOrErr)); + BuffersToWrap.emplace_back( + ArrayRef<char>(Buffer->getBufferStart(), Buffer->getBufferSize())); + } + + if (Error Err = wrapImages(BuffersToWrap)) + reportError(std::move(Err)); + + return EXIT_SUCCESS; +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits