https://github.com/YuriPlyakhin updated https://github.com/llvm/llvm-project/pull/197571
>From 878b84984b21d97e4984f64cc4b878f75aa96197 Mon Sep 17 00:00:00 2001 From: "Plyakhin, Yury" <[email protected]> Date: Wed, 13 May 2026 02:36:37 +0200 Subject: [PATCH] [clang-sycl-linker] Add per-translation-unit device code split mode Add SPLIT_PER_TU mode that groups kernels by their "sycl-module-id" attribute value (i.e., by source translation unit). Make it the default split mode, replacing SPLIT_NONE. Also fix a potential buffer invalidation bug in sycl::writeSymbolTable where appending symbol names could reallocate the output buffer while pointers into it were still live. Co-Authored-By: Claude clean up --- .../Tooling/clang-sycl-linker-split-mode.ll | 65 ++++-- clang/test/Tooling/clang-sycl-linker.ll | 13 +- .../clang-sycl-linker/ClangSYCLLinker.cpp | 188 ++++++++++++------ clang/tools/clang-sycl-linker/SYCLLinkOpts.td | 8 +- llvm/lib/Frontend/Offloading/Utility.cpp | 8 +- 5 files changed, 187 insertions(+), 95 deletions(-) diff --git a/clang/test/Tooling/clang-sycl-linker-split-mode.ll b/clang/test/Tooling/clang-sycl-linker-split-mode.ll index 2b4b1cee4e171..7402b50549239 100644 --- a/clang/test/Tooling/clang-sycl-linker-split-mode.ll +++ b/clang/test/Tooling/clang-sycl-linker-split-mode.ll @@ -9,43 +9,70 @@ ; RUN: | FileCheck %s --check-prefix=SPLIT-INVALID ; SPLIT-INVALID: module-split-mode value isn't recognized: bogus ; -; Test the split mode ("none"): no extra splits are produced. +; Test the split mode ("none"): kernels from different TUs are not split into separate images. ; RUN: clang-sycl-linker --dry-run -v -triple=spirv64 --module-split-mode=none %t.bc -o %t-none.out 2>&1 \ ; RUN: | FileCheck %s --check-prefix=SPLIT-NONE ; SPLIT-NONE: sycl-device-link: inputs: {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc -; SPLIT-NONE-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[LLVMLINKOUT]].bc, mode: none ; SPLIT-NONE-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: {{.*}}_0.spv -; SPLIT-NONE-NOT: LLVM backend: input: {{.*}}.bc, output: {{.*}}_1.spv +; SPLIT-NONE-NOT: {{.+}} ; -; Test per-kernel split: a module with two SPIR_KERNEL functions produces two -; device images. +; Test the split mode ("kernel"): each SPIR_KERNEL function produces its own device image. ; RUN: clang-sycl-linker --dry-run -v -triple=spirv64 --module-split-mode=kernel %t.bc -o %t-split-kernel.out 2>&1 \ ; RUN: | FileCheck %s --check-prefix=SPLIT-KERNEL ; SPLIT-KERNEL: sycl-device-link: inputs: {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc -; SPLIT-KERNEL-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[SPLIT0:.*]].bc, [[SPLIT1:.*]].bc, mode: kernel +; SPLIT-KERNEL-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, mode: kernel +; SPLIT-KERNEL-NEXT: [[SPLIT0:.*]].bc [kernel_c ] +; SPLIT-KERNEL-NEXT: [[SPLIT1:.*]].bc [kernel_b ] +; SPLIT-KERNEL-NEXT: [[SPLIT2:.*]].bc [kernel_a ] ; SPLIT-KERNEL-NEXT: LLVM backend: input: [[SPLIT0]].bc, output: {{.*}}_0.spv ; SPLIT-KERNEL-NEXT: LLVM backend: input: [[SPLIT1]].bc, output: {{.*}}_1.spv +; SPLIT-KERNEL-NEXT: LLVM backend: input: [[SPLIT2]].bc, output: {{.*}}_2.spv +; +; Test default split mode ('source'): no --module-split-mode flag needed. +; Two kernels with different sycl-module-id values produce two device images. +; sycl_external function is not treated as entry point and doesn't produce a separate image +; despite having a different sycl-module-id. +; RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t.bc -o %t-src.out 2>&1 \ +; RUN: | FileCheck %s --check-prefix=SPLIT-SRC +; +; Test per-TU split ('source' explicitly provided) +; RUN: clang-sycl-linker --dry-run -v -triple=spirv64 --module-split-mode=source %t.bc -o %t-src.out 2>&1 \ +; RUN: | FileCheck %s --check-prefix=SPLIT-SRC +; SPLIT-SRC: sycl-device-link: inputs: {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc +; SPLIT-SRC-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, mode: source +; SPLIT-SRC-NEXT: [[S0:.*]].bc [kernel_b kernel_c ] +; SPLIT-SRC-NEXT: [[S1:.*]].bc [kernel_a ] +; SPLIT-SRC-NEXT: LLVM backend: input: [[S0]].bc, output: {{.*}}_0.spv +; SPLIT-SRC-NEXT: LLVM backend: input: [[S1]].bc, output: {{.*}}_1.spv +; SPLIT-SRC-NOT: {{.+}} target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" target triple = "spirv64" -define spir_func i32 @helper_shared(i32 %a) { -entry: - %r = add nsw i32 %a, 1 - ret i32 %r +define spir_func i32 @helper() { + ret i32 0 +} + +define spir_kernel void @kernel_a() #0 { + %r = call spir_func i32 @helper() + ret void } -define spir_kernel void @kernel_a(ptr addrspace(1) %out, i32 %a) { -entry: - %r = tail call spir_func i32 @helper_shared(i32 %a) - store i32 %r, ptr addrspace(1) %out, align 4 +define spir_kernel void @kernel_b() #1 { + %r = call spir_func i32 @helper() ret void } -define spir_kernel void @kernel_b(ptr addrspace(1) %out, i32 %a, i32 %b) { -entry: - %h = tail call spir_func i32 @helper_shared(i32 %a) - %r = mul nsw i32 %h, %b - store i32 %r, ptr addrspace(1) %out, align 4 +define spir_kernel void @kernel_c() #1 { + %r = call spir_func i32 @helper() ret void } + +define spir_func i32 @ext_fn() #2 { + %r = call spir_func i32 @helper() + ret i32 0 +} + +attributes #0 = { "sycl-module-id"="TU1.cpp" } +attributes #1 = { "sycl-module-id"="TU2.cpp" } +attributes #2 = { "sycl-module-id"="TU3.cpp" } diff --git a/clang/test/Tooling/clang-sycl-linker.ll b/clang/test/Tooling/clang-sycl-linker.ll index cf0fb33d1bc06..958dc031b6899 100644 --- a/clang/test/Tooling/clang-sycl-linker.ll +++ b/clang/test/Tooling/clang-sycl-linker.ll @@ -7,11 +7,11 @@ ; RUN: llvm-as %t/input2.ll -o %t/input2.bc ; ; Test the dry run of a simple case to link two input files. -; RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t/input1.bc %t/input2.bc -o %t/spirv.out 2>&1 \ +; RUN: clang-sycl-linker --dry-run -v -triple=spirv64 --module-split-mode=none %t/input1.bc %t/input2.bc -o %t/spirv.out 2>&1 \ ; RUN: | FileCheck %s --check-prefix=SIMPLE-FO ; SIMPLE-FO: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc -; SIMPLE-FO-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[LLVMLINKOUT]].bc, mode: none ; SIMPLE-FO-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: {{.*}}_0.spv +; SIMPLE-FO-NOT: {{.+}} ; ; Test that IMG_SPIRV image kind is set for non-AOT compilation. ; RUN: llvm-objdump --offloading %t/spirv.out | FileCheck %s --check-prefix=IMAGE-KIND-SPIRV @@ -21,10 +21,9 @@ ; RUN: mkdir -p %t/libs ; RUN: touch %t/libs/lib1.bc ; RUN: touch %t/libs/lib2.bc -; RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t/input1.bc %t/input2.bc --library-path=%t/libs --device-libs=lib1.bc,lib2.bc -o a.spv 2>&1 \ +; RUN: clang-sycl-linker --dry-run -v -triple=spirv64 --module-split-mode=none %t/input1.bc %t/input2.bc --library-path=%t/libs --device-libs=lib1.bc,lib2.bc -o a.spv 2>&1 \ ; RUN: | FileCheck %s --check-prefix=DEVLIBS ; DEVLIBS: sycl-device-link: inputs: {{.*}}.bc libfiles: {{.*}}lib1.bc, {{.*}}lib2.bc output: [[LLVMLINKOUT:.*]].bc -; DEVLIBS-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[LLVMLINKOUT]].bc, mode: none ; DEVLIBS-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: a_0.spv ; ; Test a simple case with a random file (not bitcode) as input. @@ -42,11 +41,10 @@ ; DEVLIBSERR2: '{{.*}}lib3.bc' SYCL device library file is not found ; ; Test AOT compilation for an Intel GPU. -; RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=bmg_g21 %t/input1.bc %t/input2.bc -o %t/aot-gpu.out 2>&1 \ +; RUN: clang-sycl-linker --dry-run -v -triple=spirv64 --module-split-mode=none -arch=bmg_g21 %t/input1.bc %t/input2.bc -o %t/aot-gpu.out 2>&1 \ ; RUN: --ocloc-options="-a -b" \ ; RUN: | FileCheck %s --check-prefix=AOT-INTEL-GPU ; AOT-INTEL-GPU: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc -; AOT-INTEL-GPU-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[LLVMLINKOUT]].bc, mode: none ; AOT-INTEL-GPU-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: [[SPIRVTRANSLATIONOUT:.*]]_0.spv ; AOT-INTEL-GPU-NEXT: "{{.*}}ocloc{{.*}}" {{.*}}-device bmg_g21 -a -b {{.*}}-output [[SPIRVTRANSLATIONOUT]]_0.out -file [[SPIRVTRANSLATIONOUT]]_0.spv ; @@ -55,11 +53,10 @@ ; IMAGE-KIND-OBJECT: kind elf ; ; Test AOT compilation for an Intel CPU. -; RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=graniterapids %t/input1.bc %t/input2.bc -o %t/aot-cpu.out 2>&1 \ +; RUN: clang-sycl-linker --dry-run -v -triple=spirv64 --module-split-mode=none -arch=graniterapids %t/input1.bc %t/input2.bc -o %t/aot-cpu.out 2>&1 \ ; RUN: --opencl-aot-options="-a -b" \ ; RUN: | FileCheck %s --check-prefix=AOT-INTEL-CPU ; AOT-INTEL-CPU: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc -; AOT-INTEL-CPU-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[LLVMLINKOUT]].bc, mode: none ; AOT-INTEL-CPU-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: [[SPIRVTRANSLATIONOUT:.*]]_0.spv ; AOT-INTEL-CPU-NEXT: "{{.*}}opencl-aot{{.*}}" {{.*}}--device=cpu -a -b {{.*}}-o [[SPIRVTRANSLATIONOUT]]_0.out [[SPIRVTRANSLATIONOUT]]_0.spv ; diff --git a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp index 5a525d263427d..9c29f1b9f9fb4 100644 --- a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp +++ b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp @@ -468,20 +468,36 @@ static Error runAOTCompile(StringRef InputFile, StringRef OutputFile, return createStringError(inconvertibleErrorCode(), "Unsupported arch"); } +static constexpr char AttrSYCLModuleId[] = "sycl-module-id"; + /// SYCL device code module split mode. enum class IRSplitMode { + SPLIT_PER_TU, // one module per translation unit SPLIT_PER_KERNEL, // one module per kernel SPLIT_NONE // no splitting }; -/// Parses the value of \p -module-split-mode. +/// Parses the value of \p --module-split-mode. static std::optional<IRSplitMode> convertStringToSplitMode(StringRef S) { return StringSwitch<std::optional<IRSplitMode>>(S) + .Case("source", IRSplitMode::SPLIT_PER_TU) .Case("kernel", IRSplitMode::SPLIT_PER_KERNEL) .Case("none", IRSplitMode::SPLIT_NONE) .Default(std::nullopt); } +static StringRef splitModeToString(IRSplitMode Mode) { + switch (Mode) { + case IRSplitMode::SPLIT_PER_TU: + return "source"; + case IRSplitMode::SPLIT_PER_KERNEL: + return "kernel"; + case IRSplitMode::SPLIT_NONE: + return "none"; + } + llvm_unreachable("bad split mode"); +} + /// Result of splitting a device module: the bitcode file path and the /// serialized symbol table for each device image. struct SplitModule { @@ -489,63 +505,81 @@ struct SplitModule { SmallString<0> Symbols; }; -static bool isEntryPoint(const Function &F) { - return !F.isDeclaration() && F.hasKernelCallingConv(); +static bool isEntryPoint(const Function &F, bool EmitOnlyKernelsAsEntryPoints) { + if (F.isDeclaration()) + return false; + if (F.hasKernelCallingConv()) + return true; + if (EmitOnlyKernelsAsEntryPoints) + return false; + // sycl_external functions carry the "sycl-module-id" attribute. + // This branch is not reachable while EmitOnlyKernelsAsEntryPoints is + // hardcoded to true (see TODO in runSYCLLink). + return F.hasFnAttribute(AttrSYCLModuleId); } -/// Collect kernel names from \p M and serialize them into a symbol table. -static SmallString<0> collectSymbols(const Module &M) { - SmallVector<StringRef> KernelNames; +/// Collect entry point names from \p M and serialize them into a symbol table. +static SmallString<0> collectEntryPoints(const Module &M, + bool EmitOnlyKernelsAsEntryPoints) { + SmallVector<StringRef> Names; for (const Function &F : M) - if (isEntryPoint(F)) - KernelNames.push_back(F.getName()); + if (isEntryPoint(F, EmitOnlyKernelsAsEntryPoints)) + Names.push_back(F.getName()); SmallString<0> SymbolData; - llvm::offloading::sycl::writeSymbolTable(KernelNames, SymbolData); + llvm::offloading::sycl::writeSymbolTable(Names, SymbolData); return SymbolData; } -/// Splits the fully linked device \p M into one bitcode file per device image -/// according to \p Mode and returns the list of split images with their symbol -/// tables. -/// -/// For SPLIT_NONE, \p LinkedBitcodeFile is returned as-is. -/// For SPLIT_PER_KERNEL, the module is split into parts such that each part -/// contains exactly one kernel entry point and its transitive dependencies; -/// each part is written to a fresh temporary bitcode file. -static Expected<SmallVector<SplitModule, 0>> -splitDeviceCode(std::unique_ptr<Module> M, StringRef LinkedBitcodeFile, - IRSplitMode Mode, const ArgList &Args) { - SmallVector<SplitModule, 0> SplitModules; +/// Functor passed to splitModuleTransitiveFromEntryPoints. For each input \p F, +/// returns a numeric group ID (if \p F is an entry point) determining which +/// device image it lands in, or std::nullopt (for non-entry-points). +/// SPLIT_PER_KERNEL \p Mode gives each kernel its own ID; +/// SPLIT_PER_TU \p Mode groups kernels by their "sycl-module-id" attribute +/// value. +class EntryPointCategorizer { +public: + EntryPointCategorizer(IRSplitMode Mode, bool EmitOnlyKernelsAsEntryPoints) + : Mode(Mode), OnlyKernelsAreEntryPoints(EmitOnlyKernelsAsEntryPoints) {} - if (Mode == IRSplitMode::SPLIT_NONE) { - SplitModules.push_back( - {SmallString<256>(LinkedBitcodeFile), collectSymbols(*M)}); - return SplitModules; - } + std::optional<int> operator()(const Function &F) { + if (!isEntryPoint(F, OnlyKernelsAreEntryPoints)) + return std::nullopt; - assert(Mode == IRSplitMode::SPLIT_PER_KERNEL); + std::string Key; + switch (Mode) { + case IRSplitMode::SPLIT_PER_KERNEL: + Key = F.getName().str(); + break; + case IRSplitMode::SPLIT_PER_TU: + Key = F.getFnAttribute(AttrSYCLModuleId).getValueAsString().str(); + break; + case IRSplitMode::SPLIT_NONE: + llvm_unreachable("categorizer cannot be used for SPLIT_NONE"); + } - // splitModuleTransitiveFromEntryPoints asserts that at least one entry point - // was categorized. If the linked module contains no kernel definitions at - // all, there is nothing to split; fall back to shipping the linked module - // as a single image. - bool HasKernel = llvm::any_of(M->functions(), isEntryPoint); - if (!HasKernel) { - SplitModules.push_back( - {SmallString<256>(LinkedBitcodeFile), collectSymbols(*M)}); - return SplitModules; + auto [It, Inserted] = + StrToId.try_emplace(std::move(Key), static_cast<int>(StrToId.size())); + return It->second; } - // Categorize each kernel function into its own group. Non-kernels and - // declarations return std::nullopt so they are pulled into whichever split - // transitively needs them. - int NextCategory = 0; - auto EntryPointCategorizer = - [&NextCategory](const Function &F) -> std::optional<int> { - if (!isEntryPoint(F)) - return std::nullopt; - return NextCategory++; - }; +private: + IRSplitMode Mode; + bool OnlyKernelsAreEntryPoints; + llvm::StringMap<int> StrToId; +}; + +/// Splits the fully linked device \p M into one bitcode file per device image +/// according to \p Mode and returns the list of split images with their symbol +/// tables. The module is split transitively from entry points; each part is +/// written to a fresh temporary bitcode file. +static Expected<SmallVector<SplitModule, 0>> +splitDeviceCode(std::unique_ptr<Module> M, StringRef LinkedBitcodeFile, + IRSplitMode Mode, bool EmitOnlyKernelsAsEntryPoints, + const ArgList &Args) { + assert(Mode != IRSplitMode::SPLIT_NONE && "SPLIT_NONE is unsupported"); + + SmallVector<SplitModule, 0> SplitModules; + EntryPointCategorizer Categorizer(Mode, EmitOnlyKernelsAsEntryPoints); auto SplitCallback = [&](std::unique_ptr<Module> Part) -> Error { Expected<StringRef> BitcodeFileOrErr = @@ -560,17 +594,40 @@ splitDeviceCode(std::unique_ptr<Module> M, StringRef LinkedBitcodeFile, WriteBitcodeToFile(*Part, OS); SplitModules.push_back( - {SmallString<256>(*BitcodeFileOrErr), collectSymbols(*Part)}); + {SmallString<256>(*BitcodeFileOrErr), + collectEntryPoints(*Part, EmitOnlyKernelsAsEntryPoints)}); return Error::success(); }; if (Error Err = splitModuleTransitiveFromEntryPoints( - std::move(M), EntryPointCategorizer, SplitCallback)) + std::move(M), Categorizer, SplitCallback)) return Err; + if (Verbose || DryRun) { + errs() << formatv("sycl-module-split: input: {0}, mode: {1}\n", + LinkedBitcodeFile, splitModeToString(Mode)); + for (const SplitModule &SI : SplitModules) { + errs() << formatv("{0} [", SI.ModuleFilePath); + llvm::offloading::sycl::forEachSymbol( + SI.Symbols, [](StringRef Name) { errs() << Name << " "; }); + errs() << "]\n"; + } + } + return SplitModules; } +/// Returns true if module splitting can be skipped: either \p Mode is +/// SPLIT_NONE, or \p M contains no entry points (nothing to split from). +static bool checkModuleSplitCanBeSkipped(IRSplitMode Mode, const Module &M, + bool EmitOnlyKernelsAsEntryPoints) { + if (Mode == IRSplitMode::SPLIT_NONE) + return true; + return llvm::none_of(M.functions(), [&](const Function &F) { + return isEntryPoint(F, EmitOnlyKernelsAsEntryPoints); + }); +} + /// Performs the following steps: /// 1. Link input device code (user code and SYCL device library code). /// 2. Run SPIR-V code generation. @@ -586,7 +643,7 @@ Error runSYCLLink(ArrayRef<std::string> Files, const ArgList &Args) { auto &[LinkedModule, LinkedFile] = *LinkedOrErr; // Determine the requested module split mode. - IRSplitMode SplitMode = IRSplitMode::SPLIT_NONE; + IRSplitMode SplitMode = IRSplitMode::SPLIT_PER_TU; if (Arg *A = Args.getLastArg(OPT_module_split_mode_EQ)) { std::optional<IRSplitMode> ModeOrNone = convertStringToSplitMode(A->getValue()); @@ -596,20 +653,25 @@ Error runSYCLLink(ArrayRef<std::string> Files, const ArgList &Args) { SplitMode = *ModeOrNone; } - // Split the linked module into one or more device images. - Expected<SmallVector<SplitModule, 0>> SplitModulesOrErr = - splitDeviceCode(std::move(LinkedModule), LinkedFile, SplitMode, Args); - if (!SplitModulesOrErr) - return SplitModulesOrErr.takeError(); - SmallVector<SplitModule, 0> &SplitModules = *SplitModulesOrErr; - if (Verbose) { - SmallVector<StringRef> SplitFiles; - for (const SplitModule &SI : SplitModules) - SplitFiles.push_back(SI.ModuleFilePath); - errs() << formatv("sycl-module-split: input: {0}, output: {1}, mode: {2}\n", - LinkedFile, llvm::join(SplitFiles, ", "), - SplitMode == IRSplitMode::SPLIT_PER_KERNEL ? "kernel" - : "none"); + // TODO: Expose this as a command-line option and default it to false when + // device-image dynamic linking is supported, so that sycl_external functions + // can be called across device image boundaries. + bool EmitOnlyKernelsAsEntryPoints = true; + + SmallVector<SplitModule, 0> SplitModules; + if (checkModuleSplitCanBeSkipped(SplitMode, *LinkedModule, + EmitOnlyKernelsAsEntryPoints)) { + SplitModules.push_back( + {SmallString<256>(LinkedFile), + collectEntryPoints(*LinkedModule, EmitOnlyKernelsAsEntryPoints)}); + } else { + Expected<SmallVector<SplitModule, 0>> SplitModulesOrErr = + splitDeviceCode(std::move(LinkedModule), LinkedFile, SplitMode, + EmitOnlyKernelsAsEntryPoints, Args); + if (!SplitModulesOrErr) + return SplitModulesOrErr.takeError(); + + SplitModules = std::move(*SplitModulesOrErr); } bool IsAOTCompileNeeded = IsIntelOffloadArch( diff --git a/clang/tools/clang-sycl-linker/SYCLLinkOpts.td b/clang/tools/clang-sycl-linker/SYCLLinkOpts.td index 740c5a4783ac9..171915c29bd93 100644 --- a/clang/tools/clang-sycl-linker/SYCLLinkOpts.td +++ b/clang/tools/clang-sycl-linker/SYCLLinkOpts.td @@ -57,6 +57,8 @@ def opencl_aot_options_EQ : Joined<["--", "-"], "opencl-aot-options=">, def module_split_mode_EQ : Joined<["--", "-"], "module-split-mode=">, Flags<[LinkerOnlyOption]>, MetaVarName<"<mode>">, - HelpText<"SYCL device code module split mode. Valid values: 'none' (default) " - "emits a single device image; 'kernel' emits one device image per " - "kernel function.">; + HelpText<"SYCL device code module split mode. Valid values:\n" + "- 'source' (default) emits one device image per translation unit that contains " + "at least one entry point;\n" + "- 'kernel' emits one device image per kernel function;\n" + "- 'none' emits a single device image.">; diff --git a/llvm/lib/Frontend/Offloading/Utility.cpp b/llvm/lib/Frontend/Offloading/Utility.cpp index d689d1bb192d6..909af0237910f 100644 --- a/llvm/lib/Frontend/Offloading/Utility.cpp +++ b/llvm/lib/Frontend/Offloading/Utility.cpp @@ -462,8 +462,12 @@ void sycl::writeSymbolTable(ArrayRef<StringRef> Names, SmallString<0> &Out) { uint32_t StringDataOffset = sizeof(SymbolTableHeader) + Count * sizeof(SymbolTableEntry); - // Pre-size the output to hold the header and entry array; string data is - // appended below. + // Compute total size and reserve to prevent reallocation while writing + // entries via pointer (append() could otherwise invalidate the pointer). + uint32_t TotalSize = StringDataOffset; + for (StringRef N : Names) + TotalSize += N.size() + 1; + Out.reserve(TotalSize); Out.resize(StringDataOffset); // Write the header. _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
