https://github.com/YuriPlyakhin updated https://github.com/llvm/llvm-project/pull/196435
>From de5f17e5602c93d3aa6814078ee216dcf3687ece Mon Sep 17 00:00:00 2001 From: "Plyakhin, Yury" <[email protected]> Date: Wed, 6 May 2026 18:32:36 +0200 Subject: [PATCH 1/2] [clang-sycl-linker] Add per-translation-unit device code split mode Adds `source` split mode to `clang-sycl-linker`, driven by the `sycl-module-id` function attribute emitted by the SYCL frontend. `source` is the default mode and groups kernels by the value of their `sycl-module-id` attribute, emitting one device image per translation unit. If the linked module contains no kernels, no device image is emitted. `none` disables splitting and emits a single device image. `kernel` emits one device image per kernel function. The `EntryPointCategorizer` in `ClangSYCLLinker.cpp` is refactored into a class (instead of a stateful lambda) to support both per-kernel and per-TU modes cleanly. `llvm-split`'s `-split-by-category=module-id` is renamed to `-split-by-category=attribute` and the previously hardcoded `"module-id"` attribute name is replaced by a required `--category-attribute=<name>` CLI option. This decouples the tool from any specific attribute name. All `SplitByCategory` tests are updated accordingly. Co-Authored-By: Claude <[email protected]> --- clang/test/Driver/Inputs/SYCL/external-fn.ll | 19 +++ clang/test/Driver/Inputs/SYCL/two-modules.ll | 25 ++++ clang/test/Driver/clang-sycl-linker-test.cpp | 32 ++++- .../clang-sycl-linker/ClangSYCLLinker.cpp | 131 ++++++++++++------ clang/tools/clang-sycl-linker/SYCLLinkOpts.td | 11 +- .../complex-indirect-call-chain1.ll | 2 +- .../complex-indirect-call-chain2.ll | 2 +- .../SplitByCategory/module-split-func-ptr.ll | 2 +- .../SplitByCategory/split-by-source.ll | 2 +- .../split-with-kernel-declarations.ll | 2 +- llvm/tools/llvm-split/llvm-split.cpp | 47 ++++--- 11 files changed, 205 insertions(+), 70 deletions(-) create mode 100644 clang/test/Driver/Inputs/SYCL/external-fn.ll create mode 100644 clang/test/Driver/Inputs/SYCL/two-modules.ll diff --git a/clang/test/Driver/Inputs/SYCL/external-fn.ll b/clang/test/Driver/Inputs/SYCL/external-fn.ll new file mode 100644 index 0000000000000..b6ec0de46bdad --- /dev/null +++ b/clang/test/Driver/Inputs/SYCL/external-fn.ll @@ -0,0 +1,19 @@ +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spirv64" + +; A kernel from TU1 and a sycl_external function from TU2. + +define spir_func i32 @ext_fn(i32 %a) #1 { +entry: + %r = add nsw i32 %a, 2 + ret i32 %r +} + +define spir_kernel void @k(ptr addrspace(1) %out) #0 { +entry: + store i32 42, ptr addrspace(1) %out, align 4 + ret void +} + +attributes #0 = { "sycl-module-id"="TU1.cpp" } +attributes #1 = { "sycl-module-id"="TU2.cpp" } diff --git a/clang/test/Driver/Inputs/SYCL/two-modules.ll b/clang/test/Driver/Inputs/SYCL/two-modules.ll new file mode 100644 index 0000000000000..d63f0e6f38726 --- /dev/null +++ b/clang/test/Driver/Inputs/SYCL/two-modules.ll @@ -0,0 +1,25 @@ +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spirv64" + +define spir_func i32 @helper(i32 %a) { +entry: + %r = add nsw i32 %a, 1 + ret i32 %r +} + +define spir_kernel void @kernel_a(ptr addrspace(1) %out, i32 %a) #0 { +entry: + %r = call spir_func i32 @helper(i32 %a) + store i32 %r, ptr addrspace(1) %out, align 4 + ret void +} + +define spir_kernel void @kernel_b(ptr addrspace(1) %out, i32 %a) #1 { +entry: + %r = call spir_func i32 @helper(i32 %a) + store i32 %r, ptr addrspace(1) %out, align 4 + ret void +} + +attributes #0 = { "sycl-module-id"="TU1.cpp" } +attributes #1 = { "sycl-module-id"="TU2.cpp" } diff --git a/clang/test/Driver/clang-sycl-linker-test.cpp b/clang/test/Driver/clang-sycl-linker-test.cpp index cd99d4d47b1e1..69596252efdf0 100644 --- a/clang/test/Driver/clang-sycl-linker-test.cpp +++ b/clang/test/Driver/clang-sycl-linker-test.cpp @@ -3,13 +3,14 @@ // REQUIRES: spirv-registered-target // // Test the dry run of a simple case to link two input files. -// Also verifies the default split mode ("none"). +// The input has no SYCL kernels, so the default split mode ('source') produces +// a single device image via the no-entry-point fallback. // RUN: %clangxx -emit-llvm -c -target spirv64 %s -o %t_1.bc // RUN: %clangxx -emit-llvm -c -target spirv64 %s -o %t_2.bc // RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc -o %t-spirv.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=SIMPLE-FO // SIMPLE-FO: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc -// SIMPLE-FO-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[LLVMLINKOUT]].bc, mode: none +// SIMPLE-FO-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[LLVMLINKOUT]].bc, mode: source // SIMPLE-FO-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: {{.*}}_0.spv // // Test that IMG_SPIRV image kind is set for non-AOT compilation. @@ -17,13 +18,14 @@ // IMAGE-KIND-SPIRV: kind spir-v // // Test the dry run of a simple case with device library files specified. +// No kernels in input; default split mode ('source') produces a single image. // RUN: mkdir -p %t.dir // RUN: touch %t.dir/lib1.bc // RUN: touch %t.dir/lib2.bc // RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc --library-path=%t.dir --device-libs=lib1.bc,lib2.bc -o a.spv 2>&1 \ // RUN: | FileCheck %s --check-prefix=DEVLIBS // DEVLIBS: sycl-device-link: inputs: {{.*}}.bc libfiles: {{.*}}lib1.bc, {{.*}}lib2.bc output: [[LLVMLINKOUT:.*]].bc -// DEVLIBS-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[LLVMLINKOUT]].bc, mode: none +// DEVLIBS-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[LLVMLINKOUT]].bc, mode: source // DEVLIBS-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: a_0.spv // // Test a simple case with a random file (not bitcode) as input. @@ -41,11 +43,12 @@ // DEVLIBSERR2: '{{.*}}lib3.bc' SYCL device library file is not found // // Test AOT compilation for an Intel GPU. +// No kernels in input; default split mode ('source') produces a single image. // RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=bmg_g21 %t_1.bc %t_2.bc -o %t-aot-gpu.out 2>&1 \ // RUN: --ocloc-options="-a -b" \ // RUN: | FileCheck %s --check-prefix=AOT-INTEL-GPU // AOT-INTEL-GPU: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc -// AOT-INTEL-GPU-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[LLVMLINKOUT]].bc, mode: none +// AOT-INTEL-GPU-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[LLVMLINKOUT]].bc, mode: source // AOT-INTEL-GPU-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: [[SPIRVTRANSLATIONOUT:.*]]_0.spv // AOT-INTEL-GPU-NEXT: "{{.*}}ocloc{{.*}}" {{.*}}-device bmg_g21 -a -b {{.*}}-output [[SPIRVTRANSLATIONOUT]]_0.out -file [[SPIRVTRANSLATIONOUT]]_0.spv // @@ -54,11 +57,12 @@ // IMAGE-KIND-OBJECT: kind elf // // Test AOT compilation for an Intel CPU. +// No kernels in input; default split mode ('source') produces a single image. // RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=graniterapids %t_1.bc %t_2.bc -o %t-aot-cpu.out 2>&1 \ // RUN: --opencl-aot-options="-a -b" \ // RUN: | FileCheck %s --check-prefix=AOT-INTEL-CPU // AOT-INTEL-CPU: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc -// AOT-INTEL-CPU-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[LLVMLINKOUT]].bc, mode: none +// AOT-INTEL-CPU-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[LLVMLINKOUT]].bc, mode: source // AOT-INTEL-CPU-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: [[SPIRVTRANSLATIONOUT:.*]]_0.spv // AOT-INTEL-CPU-NEXT: "{{.*}}opencl-aot{{.*}}" {{.*}}--device=cpu -a -b {{.*}}-o [[SPIRVTRANSLATIONOUT]]_0.out [[SPIRVTRANSLATIONOUT]]_0.spv // @@ -97,3 +101,21 @@ // RUN: not clang-sycl-linker --dry-run -triple=spirv64 --module-split-mode=bogus %t_1.bc -o a.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=SPLIT-INVALID // SPLIT-INVALID: module-split-mode value isn't recognized: bogus +// +// Test per-TU split: two kernels with different sycl-module-id values produce +// two device images. +// RUN: llvm-as %S/Inputs/SYCL/two-modules.ll -o %t-tu.bc +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 --module-split-mode=source %t-tu.bc -o %t-src.out 2>&1 \ +// RUN: | FileCheck %s --check-prefix=SPLIT-SRC +// SPLIT-SRC: sycl-device-link: inputs: {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc +// SPLIT-SRC-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[S0:.*]].bc, [[S1:.*]].bc, mode: source +// SPLIT-SRC-NEXT: LLVM backend: input: [[S0]].bc, output: {{.*}}_0.spv +// SPLIT-SRC-NEXT: LLVM backend: input: [[S1]].bc, output: {{.*}}_1.spv +// +// Test that sycl_external functions are not treated as entry points: a kernel +// from TU1 and a sycl_external function from TU2 produce a single image, +// since only the kernel is an entry point. +// RUN: llvm-as %S/Inputs/SYCL/external-fn.ll -o %t-ext.bc +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 --module-split-mode=source %t-ext.bc -o %t-ext.out 2>&1 \ +// RUN: | FileCheck %s --check-prefix=SPLIT-EXT-DEFAULT +// SPLIT-EXT-DEFAULT: sycl-module-split: input: {{.*}}.bc, output: [[S0:.*]].bc, mode: source diff --git a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp index 5a525d263427d..af2273dd33dbb 100644 --- a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp +++ b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp @@ -468,20 +468,36 @@ static Error runAOTCompile(StringRef InputFile, StringRef OutputFile, return createStringError(inconvertibleErrorCode(), "Unsupported arch"); } +static constexpr char AttrSYCLModuleId[] = "sycl-module-id"; + /// SYCL device code module split mode. enum class IRSplitMode { + SPLIT_PER_TU, // one module per translation unit SPLIT_PER_KERNEL, // one module per kernel SPLIT_NONE // no splitting }; -/// Parses the value of \p -module-split-mode. +/// Parses the value of \p --module-split-mode. static std::optional<IRSplitMode> convertStringToSplitMode(StringRef S) { return StringSwitch<std::optional<IRSplitMode>>(S) + .Case("source", IRSplitMode::SPLIT_PER_TU) .Case("kernel", IRSplitMode::SPLIT_PER_KERNEL) .Case("none", IRSplitMode::SPLIT_NONE) .Default(std::nullopt); } +static StringRef splitModeToString(IRSplitMode Mode) { + switch (Mode) { + case IRSplitMode::SPLIT_PER_TU: + return "source"; + case IRSplitMode::SPLIT_PER_KERNEL: + return "kernel"; + case IRSplitMode::SPLIT_NONE: + return "none"; + } + llvm_unreachable("bad split mode"); +} + /// Result of splitting a device module: the bitcode file path and the /// serialized symbol table for each device image. struct SplitModule { @@ -489,63 +505,92 @@ struct SplitModule { SmallString<0> Symbols; }; -static bool isEntryPoint(const Function &F) { - return !F.isDeclaration() && F.hasKernelCallingConv(); +static bool isEntryPoint(const Function &F, bool EmitOnlyKernelsAsEntryPoints) { + if (F.isDeclaration()) + return false; + if (F.hasKernelCallingConv()) + return true; + if (EmitOnlyKernelsAsEntryPoints) + return false; + // sycl_external functions carry the "sycl-module-id" attribute. + return F.hasFnAttribute(AttrSYCLModuleId); } -/// Collect kernel names from \p M and serialize them into a symbol table. -static SmallString<0> collectSymbols(const Module &M) { - SmallVector<StringRef> KernelNames; +/// Collect entry point names from \p M and serialize them into a symbol table. +static SmallString<0> collectSymbols(const Module &M, + bool EmitOnlyKernelsAsEntryPoints) { + SmallVector<StringRef> Names; for (const Function &F : M) - if (isEntryPoint(F)) - KernelNames.push_back(F.getName()); + if (isEntryPoint(F, EmitOnlyKernelsAsEntryPoints)) + Names.push_back(F.getName()); SmallString<0> SymbolData; - llvm::offloading::sycl::writeSymbolTable(KernelNames, SymbolData); + llvm::offloading::sycl::writeSymbolTable(Names, SymbolData); return SymbolData; } +class EntryPointCategorizer { +public: + EntryPointCategorizer(IRSplitMode Mode, bool EmitOnlyKernelsAsEntryPoints) + : Mode(Mode), OnlyKernelsAreEntryPoints(EmitOnlyKernelsAsEntryPoints) {} + + std::optional<int> operator()(const Function &F) { + if (!isEntryPoint(F, OnlyKernelsAreEntryPoints)) + return std::nullopt; + + std::string Key; + switch (Mode) { + case IRSplitMode::SPLIT_PER_KERNEL: + Key = F.getName().str(); + break; + case IRSplitMode::SPLIT_PER_TU: + Key = F.getFnAttribute(AttrSYCLModuleId).getValueAsString().str(); + break; + case IRSplitMode::SPLIT_NONE: + llvm_unreachable("categorizer not used for SPLIT_NONE"); + } + + auto [It, Inserted] = + StrToId.try_emplace(std::move(Key), static_cast<int>(StrToId.size())); + return It->second; + } + +private: + IRSplitMode Mode; + bool OnlyKernelsAreEntryPoints; + llvm::StringMap<int> StrToId; +}; + /// Splits the fully linked device \p M into one bitcode file per device image /// according to \p Mode and returns the list of split images with their symbol /// tables. /// /// For SPLIT_NONE, \p LinkedBitcodeFile is returned as-is. -/// For SPLIT_PER_KERNEL, the module is split into parts such that each part -/// contains exactly one kernel entry point and its transitive dependencies; +/// For all other modes the module is split transitively from entry points; /// each part is written to a fresh temporary bitcode file. static Expected<SmallVector<SplitModule, 0>> splitDeviceCode(std::unique_ptr<Module> M, StringRef LinkedBitcodeFile, - IRSplitMode Mode, const ArgList &Args) { + IRSplitMode Mode, bool EmitOnlyKernelsAsEntryPoints, + const ArgList &Args) { SmallVector<SplitModule, 0> SplitModules; if (Mode == IRSplitMode::SPLIT_NONE) { - SplitModules.push_back( - {SmallString<256>(LinkedBitcodeFile), collectSymbols(*M)}); + SplitModules.push_back({SmallString<256>(LinkedBitcodeFile), + collectSymbols(*M, EmitOnlyKernelsAsEntryPoints)}); return SplitModules; } - assert(Mode == IRSplitMode::SPLIT_PER_KERNEL); - - // splitModuleTransitiveFromEntryPoints asserts that at least one entry point - // was categorized. If the linked module contains no kernel definitions at - // all, there is nothing to split; fall back to shipping the linked module - // as a single image. - bool HasKernel = llvm::any_of(M->functions(), isEntryPoint); - if (!HasKernel) { - SplitModules.push_back( - {SmallString<256>(LinkedBitcodeFile), collectSymbols(*M)}); + // splitModuleTransitiveFromEntryPoints requires at least one categorized + // entry point. Fall back to a single image if the module has none. + bool HasEntryPoint = llvm::any_of(M->functions(), [&](const Function &F) { + return isEntryPoint(F, EmitOnlyKernelsAsEntryPoints); + }); + if (!HasEntryPoint) { + SplitModules.push_back({SmallString<256>(LinkedBitcodeFile), + collectSymbols(*M, EmitOnlyKernelsAsEntryPoints)}); return SplitModules; } - // Categorize each kernel function into its own group. Non-kernels and - // declarations return std::nullopt so they are pulled into whichever split - // transitively needs them. - int NextCategory = 0; - auto EntryPointCategorizer = - [&NextCategory](const Function &F) -> std::optional<int> { - if (!isEntryPoint(F)) - return std::nullopt; - return NextCategory++; - }; + EntryPointCategorizer Categorizer(Mode, EmitOnlyKernelsAsEntryPoints); auto SplitCallback = [&](std::unique_ptr<Module> Part) -> Error { Expected<StringRef> BitcodeFileOrErr = @@ -560,12 +605,13 @@ splitDeviceCode(std::unique_ptr<Module> M, StringRef LinkedBitcodeFile, WriteBitcodeToFile(*Part, OS); SplitModules.push_back( - {SmallString<256>(*BitcodeFileOrErr), collectSymbols(*Part)}); + {SmallString<256>(*BitcodeFileOrErr), + collectSymbols(*Part, EmitOnlyKernelsAsEntryPoints)}); return Error::success(); }; if (Error Err = splitModuleTransitiveFromEntryPoints( - std::move(M), EntryPointCategorizer, SplitCallback)) + std::move(M), Categorizer, SplitCallback)) return Err; return SplitModules; @@ -586,7 +632,7 @@ Error runSYCLLink(ArrayRef<std::string> Files, const ArgList &Args) { auto &[LinkedModule, LinkedFile] = *LinkedOrErr; // Determine the requested module split mode. - IRSplitMode SplitMode = IRSplitMode::SPLIT_NONE; + IRSplitMode SplitMode = IRSplitMode::SPLIT_PER_TU; if (Arg *A = Args.getLastArg(OPT_module_split_mode_EQ)) { std::optional<IRSplitMode> ModeOrNone = convertStringToSplitMode(A->getValue()); @@ -596,9 +642,15 @@ Error runSYCLLink(ArrayRef<std::string> Files, const ArgList &Args) { SplitMode = *ModeOrNone; } + // TODO: Expose this as a command-line option and default it to false when + // device-image dynamic linking is supported, so that sycl_external functions + // can be called across device image boundaries. + bool EmitOnlyKernelsAsEntryPoints = true; + // Split the linked module into one or more device images. Expected<SmallVector<SplitModule, 0>> SplitModulesOrErr = - splitDeviceCode(std::move(LinkedModule), LinkedFile, SplitMode, Args); + splitDeviceCode(std::move(LinkedModule), LinkedFile, SplitMode, + EmitOnlyKernelsAsEntryPoints, Args); if (!SplitModulesOrErr) return SplitModulesOrErr.takeError(); SmallVector<SplitModule, 0> &SplitModules = *SplitModulesOrErr; @@ -608,8 +660,7 @@ Error runSYCLLink(ArrayRef<std::string> Files, const ArgList &Args) { SplitFiles.push_back(SI.ModuleFilePath); errs() << formatv("sycl-module-split: input: {0}, output: {1}, mode: {2}\n", LinkedFile, llvm::join(SplitFiles, ", "), - SplitMode == IRSplitMode::SPLIT_PER_KERNEL ? "kernel" - : "none"); + splitModeToString(SplitMode)); } bool IsAOTCompileNeeded = IsIntelOffloadArch( diff --git a/clang/tools/clang-sycl-linker/SYCLLinkOpts.td b/clang/tools/clang-sycl-linker/SYCLLinkOpts.td index 740c5a4783ac9..c60b06573d8f3 100644 --- a/clang/tools/clang-sycl-linker/SYCLLinkOpts.td +++ b/clang/tools/clang-sycl-linker/SYCLLinkOpts.td @@ -57,6 +57,11 @@ def opencl_aot_options_EQ : Joined<["--", "-"], "opencl-aot-options=">, def module_split_mode_EQ : Joined<["--", "-"], "module-split-mode=">, Flags<[LinkerOnlyOption]>, MetaVarName<"<mode>">, - HelpText<"SYCL device code module split mode. Valid values: 'none' (default) " - "emits a single device image; 'kernel' emits one device image per " - "kernel function.">; + HelpText<"SYCL device code module split mode. Valid values: " + "'source' (default) emits one device image per translation unit " + "that contains at least one kernel (grouped by the 'sycl-module-id' " + "attribute); translation units containing only sycl_external " + "functions do not produce a device image, this behavior may change " + "in the future; " + "'kernel' emits one device image per kernel function; " + "'none' emits a single device image.">; diff --git a/llvm/test/tools/llvm-split/SplitByCategory/complex-indirect-call-chain1.ll b/llvm/test/tools/llvm-split/SplitByCategory/complex-indirect-call-chain1.ll index 80123d4dd8fb7..50e08cc093d83 100644 --- a/llvm/test/tools/llvm-split/SplitByCategory/complex-indirect-call-chain1.ll +++ b/llvm/test/tools/llvm-split/SplitByCategory/complex-indirect-call-chain1.ll @@ -1,7 +1,7 @@ ; Check that Module splitting can trace through more complex call stacks ; involving several nested indirect calls. -; RUN: llvm-split -split-by-category=module-id -S < %s -o %t +; RUN: llvm-split -split-by-category=attribute --category-attribute=module-id -S < %s -o %t ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix CHECK0 \ ; RUN: --implicit-check-not @foo --implicit-check-not @kernel_A \ ; RUN: --implicit-check-not @kernel_B --implicit-check-not @baz diff --git a/llvm/test/tools/llvm-split/SplitByCategory/complex-indirect-call-chain2.ll b/llvm/test/tools/llvm-split/SplitByCategory/complex-indirect-call-chain2.ll index 0c80602f99eef..aa84c5fbf904a 100644 --- a/llvm/test/tools/llvm-split/SplitByCategory/complex-indirect-call-chain2.ll +++ b/llvm/test/tools/llvm-split/SplitByCategory/complex-indirect-call-chain2.ll @@ -1,6 +1,6 @@ ; Check that Module splitting can trace indirect calls through signatures. -; RUN: llvm-split -split-by-category=module-id -S < %s -o %t +; RUN: llvm-split -split-by-category=attribute --category-attribute=module-id -S < %s -o %t ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix CHECK0 \ ; RUN: --implicit-check-not @kernel_A --implicit-check-not @bbb ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefix CHECK1 \ diff --git a/llvm/test/tools/llvm-split/SplitByCategory/module-split-func-ptr.ll b/llvm/test/tools/llvm-split/SplitByCategory/module-split-func-ptr.ll index 316500a4c7611..ee263fc38a893 100644 --- a/llvm/test/tools/llvm-split/SplitByCategory/module-split-func-ptr.ll +++ b/llvm/test/tools/llvm-split/SplitByCategory/module-split-func-ptr.ll @@ -1,7 +1,7 @@ ; This test checks that Module splitting can properly perform device code split by tracking ; all uses of functions (not only direct calls). -; RUN: llvm-split -split-by-category=module-id -S < %s -o %t +; RUN: llvm-split -split-by-category=attribute --category-attribute=module-id -S < %s -o %t ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix=CHECK-IR0 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefix=CHECK-IR1 diff --git a/llvm/test/tools/llvm-split/SplitByCategory/split-by-source.ll b/llvm/test/tools/llvm-split/SplitByCategory/split-by-source.ll index 54485b7b7f348..dc0cc292f50fe 100644 --- a/llvm/test/tools/llvm-split/SplitByCategory/split-by-source.ll +++ b/llvm/test/tools/llvm-split/SplitByCategory/split-by-source.ll @@ -1,7 +1,7 @@ ; Test checks that kernels are being split by attached module-id metadata and ; used functions are being moved with kernels that use them. -; RUN: llvm-split -split-by-category=module-id -S < %s -o %t +; RUN: llvm-split -split-by-category=attribute --category-attribute=module-id -S < %s -o %t ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-TU0,CHECK ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-TU1,CHECK diff --git a/llvm/test/tools/llvm-split/SplitByCategory/split-with-kernel-declarations.ll b/llvm/test/tools/llvm-split/SplitByCategory/split-with-kernel-declarations.ll index 0c1bd8b5c5fba..59a7a95761d9d 100644 --- a/llvm/test/tools/llvm-split/SplitByCategory/split-with-kernel-declarations.ll +++ b/llvm/test/tools/llvm-split/SplitByCategory/split-with-kernel-declarations.ll @@ -1,6 +1,6 @@ ; The test checks that Module splitting does not treat declarations as entry points. -; RUN: llvm-split -split-by-category=module-id -S < %s -o %t1 +; RUN: llvm-split -split-by-category=attribute --category-attribute=module-id -S < %s -o %t1 ; RUN: FileCheck %s -input-file=%t1_0.ll --check-prefix CHECK-MODULE-ID0 ; RUN: FileCheck %s -input-file=%t1_1.ll --check-prefix CHECK-MODULE-ID1 diff --git a/llvm/tools/llvm-split/llvm-split.cpp b/llvm/tools/llvm-split/llvm-split.cpp index a987b8c1b3eb4..68812ae7158b4 100644 --- a/llvm/tools/llvm-split/llvm-split.cpp +++ b/llvm/tools/llvm-split/llvm-split.cpp @@ -78,7 +78,7 @@ static cl::opt<std::string> cl::value_desc("cpu"), cl::cat(SplitCategory)); enum class SplitByCategoryType { - SBCT_ByModuleId, + SBCT_ByAttribute, SBCT_ByKernel, SBCT_None, }; @@ -88,13 +88,19 @@ static cl::opt<SplitByCategoryType> SplitByCategory( cl::desc("Split by category. If present, splitting by category is used " "with the specified categorization type."), cl::Optional, cl::init(SplitByCategoryType::SBCT_None), - cl::values(clEnumValN(SplitByCategoryType::SBCT_ByModuleId, "module-id", - "one output module per translation unit marked with " - "\"module-id\" attribute"), + cl::values(clEnumValN(SplitByCategoryType::SBCT_ByAttribute, "attribute", + "one output module per unique value of the function " + "attribute named by --category-attribute"), clEnumValN(SplitByCategoryType::SBCT_ByKernel, "kernel", "one output module per kernel")), cl::cat(SplitCategory)); +static cl::opt<std::string> + CategoryAttribute("category-attribute", + cl::desc("Function attribute name to use when splitting " + "with -split-by-category=attribute"), + cl::value_desc("name"), cl::cat(SplitCategory)); + static cl::opt<bool> OutputAssembly{ "S", cl::desc("Write output as LLVM assembly"), cl::cat(SplitCategory)}; @@ -125,15 +131,16 @@ void writeModuleToFile(const Module &M, StringRef Path, bool OutputAssembly) { WriteBitcodeToFile(M, OS); } -/// EntryPointCategorizer is used for splitting by category either by module-id -/// or by kernels. It doesn't provide categories for functions other than -/// kernels. Categorizer computes a string key for the given Function and -/// records the association between the string key and an integer category. If a -/// string key is already belongs to some category than the corresponding -/// integer category is returned. +/// EntryPointCategorizer is used for splitting by category either by a named +/// function attribute or by kernels. It doesn't provide categories for +/// functions other than kernels. Categorizer computes a string key for the +/// given Function and records the association between the string key and an +/// integer category. If a string key already belongs to some category then the +/// corresponding integer category is returned. class EntryPointCategorizer { public: - EntryPointCategorizer(SplitByCategoryType Type) : Type(Type) {} + EntryPointCategorizer(SplitByCategoryType Type, StringRef AttributeName) + : Type(Type), AttributeName(AttributeName) {} EntryPointCategorizer() = delete; EntryPointCategorizer(EntryPointCategorizer &) = delete; @@ -163,16 +170,15 @@ class EntryPointCategorizer { return F.hasKernelCallingConv(); } - static SmallString<0> computeFunctionCategory(SplitByCategoryType Type, - const Function &F) { - static constexpr char ATTR_MODULE_ID[] = "module-id"; + SmallString<0> computeFunctionCategory(SplitByCategoryType Type, + const Function &F) { SmallString<0> Key; switch (Type) { case SplitByCategoryType::SBCT_ByKernel: Key = F.getName().str(); break; - case SplitByCategoryType::SBCT_ByModuleId: - Key = F.getFnAttribute(ATTR_MODULE_ID).getValueAsString().str(); + case SplitByCategoryType::SBCT_ByAttribute: + Key = F.getFnAttribute(AttributeName).getValueAsString().str(); break; default: llvm_unreachable("unexpected mode."); @@ -197,6 +203,7 @@ class EntryPointCategorizer { }; SplitByCategoryType Type; + std::string AttributeName; DenseMap<SmallString<0>, int, KeyInfo> StrKeyToID; }; @@ -209,6 +216,12 @@ void cleanupModule(Module &M) { } Error runSplitModuleByCategory(std::unique_ptr<Module> M) { + if (SplitByCategory == SplitByCategoryType::SBCT_ByAttribute && + CategoryAttribute.empty()) + return createStringError( + inconvertibleErrorCode(), + "-split-by-category=attribute requires --category-attribute=<name>"); + size_t OutputID = 0; auto PostSplitCallback = [&](std::unique_ptr<Module> MPart) -> Error { if (verifyModule(*MPart)) { @@ -228,7 +241,7 @@ Error runSplitModuleByCategory(std::unique_ptr<Module> M) { return Error::success(); }; - auto Categorizer = EntryPointCategorizer(SplitByCategory); + auto Categorizer = EntryPointCategorizer(SplitByCategory, CategoryAttribute); return splitModuleTransitiveFromEntryPoints(std::move(M), Categorizer, PostSplitCallback); } >From 39f9343ac0df15b2789075f192c58925cc5d4879 Mon Sep 17 00:00:00 2001 From: "Plyakhin, Yury" <[email protected]> Date: Sat, 9 May 2026 05:37:34 +0200 Subject: [PATCH 2/2] addressed feedback --- clang/test/Driver/clang-sycl-linker-test.cpp | 33 ++++---- .../clang-sycl-linker/ClangSYCLLinker.cpp | 84 +++++++++++-------- clang/tools/clang-sycl-linker/SYCLLinkOpts.td | 8 +- 3 files changed, 64 insertions(+), 61 deletions(-) diff --git a/clang/test/Driver/clang-sycl-linker-test.cpp b/clang/test/Driver/clang-sycl-linker-test.cpp index 69596252efdf0..608d199805293 100644 --- a/clang/test/Driver/clang-sycl-linker-test.cpp +++ b/clang/test/Driver/clang-sycl-linker-test.cpp @@ -3,14 +3,11 @@ // REQUIRES: spirv-registered-target // // Test the dry run of a simple case to link two input files. -// The input has no SYCL kernels, so the default split mode ('source') produces -// a single device image via the no-entry-point fallback. // RUN: %clangxx -emit-llvm -c -target spirv64 %s -o %t_1.bc // RUN: %clangxx -emit-llvm -c -target spirv64 %s -o %t_2.bc // RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc -o %t-spirv.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=SIMPLE-FO // SIMPLE-FO: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc -// SIMPLE-FO-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[LLVMLINKOUT]].bc, mode: source // SIMPLE-FO-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: {{.*}}_0.spv // // Test that IMG_SPIRV image kind is set for non-AOT compilation. @@ -18,14 +15,12 @@ // IMAGE-KIND-SPIRV: kind spir-v // // Test the dry run of a simple case with device library files specified. -// No kernels in input; default split mode ('source') produces a single image. // RUN: mkdir -p %t.dir // RUN: touch %t.dir/lib1.bc // RUN: touch %t.dir/lib2.bc // RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc --library-path=%t.dir --device-libs=lib1.bc,lib2.bc -o a.spv 2>&1 \ // RUN: | FileCheck %s --check-prefix=DEVLIBS // DEVLIBS: sycl-device-link: inputs: {{.*}}.bc libfiles: {{.*}}lib1.bc, {{.*}}lib2.bc output: [[LLVMLINKOUT:.*]].bc -// DEVLIBS-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[LLVMLINKOUT]].bc, mode: source // DEVLIBS-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: a_0.spv // // Test a simple case with a random file (not bitcode) as input. @@ -43,12 +38,10 @@ // DEVLIBSERR2: '{{.*}}lib3.bc' SYCL device library file is not found // // Test AOT compilation for an Intel GPU. -// No kernels in input; default split mode ('source') produces a single image. // RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=bmg_g21 %t_1.bc %t_2.bc -o %t-aot-gpu.out 2>&1 \ // RUN: --ocloc-options="-a -b" \ // RUN: | FileCheck %s --check-prefix=AOT-INTEL-GPU // AOT-INTEL-GPU: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc -// AOT-INTEL-GPU-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[LLVMLINKOUT]].bc, mode: source // AOT-INTEL-GPU-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: [[SPIRVTRANSLATIONOUT:.*]]_0.spv // AOT-INTEL-GPU-NEXT: "{{.*}}ocloc{{.*}}" {{.*}}-device bmg_g21 -a -b {{.*}}-output [[SPIRVTRANSLATIONOUT]]_0.out -file [[SPIRVTRANSLATIONOUT]]_0.spv // @@ -57,12 +50,10 @@ // IMAGE-KIND-OBJECT: kind elf // // Test AOT compilation for an Intel CPU. -// No kernels in input; default split mode ('source') produces a single image. // RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=graniterapids %t_1.bc %t_2.bc -o %t-aot-cpu.out 2>&1 \ // RUN: --opencl-aot-options="-a -b" \ // RUN: | FileCheck %s --check-prefix=AOT-INTEL-CPU // AOT-INTEL-CPU: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc -// AOT-INTEL-CPU-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[LLVMLINKOUT]].bc, mode: source // AOT-INTEL-CPU-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: [[SPIRVTRANSLATIONOUT:.*]]_0.spv // AOT-INTEL-CPU-NEXT: "{{.*}}opencl-aot{{.*}}" {{.*}}--device=cpu -a -b {{.*}}-o [[SPIRVTRANSLATIONOUT]]_0.out [[SPIRVTRANSLATIONOUT]]_0.spv // @@ -79,11 +70,12 @@ // RUN: | FileCheck %s --check-prefix=NOTARGET // NOTARGET: Target triple must be specified // -// Test the split mode ("none"): no extra splits are produced. -// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 --module-split-mode=none %t_1.bc %t_2.bc -o %t-split-none.out 2>&1 \ +// Test the split mode ("none"): kernels from different TUs are not split into +// separate images. +// RUN: llvm-as %S/Inputs/SYCL/two-modules.ll -o %t-two-mod.bc +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 --module-split-mode=none %t-two-mod.bc -o %t-split-none.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=SPLIT-NONE -// SPLIT-NONE: sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc -// SPLIT-NONE-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[LLVMLINKOUT]].bc, mode: none +// SPLIT-NONE: sycl-device-link: inputs: {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc // SPLIT-NONE-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: {{.*}}_0.spv // SPLIT-NONE-NOT: LLVM backend: input: {{.*}}.bc, output: {{.*}}_1.spv // @@ -102,10 +94,13 @@ // RUN: | FileCheck %s --check-prefix=SPLIT-INVALID // SPLIT-INVALID: module-split-mode value isn't recognized: bogus // -// Test per-TU split: two kernels with different sycl-module-id values produce -// two device images. -// RUN: llvm-as %S/Inputs/SYCL/two-modules.ll -o %t-tu.bc -// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 --module-split-mode=source %t-tu.bc -o %t-src.out 2>&1 \ +// Test default split mode ('source'): no --module-split-mode flag needed. +// Two kernels with different sycl-module-id values produce two device images. +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t-two-mod.bc -o %t-src.out 2>&1 \ +// RUN: | FileCheck %s --check-prefix=SPLIT-SRC +// +// Test per-TU split ('source' explicitely provided) +// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 --module-split-mode=source %t-two-mod.bc -o %t-src.out 2>&1 \ // RUN: | FileCheck %s --check-prefix=SPLIT-SRC // SPLIT-SRC: sycl-device-link: inputs: {{.*}}.bc libfiles: output: [[LLVMLINKOUT:.*]].bc // SPLIT-SRC-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: [[S0:.*]].bc, [[S1:.*]].bc, mode: source @@ -117,5 +112,5 @@ // since only the kernel is an entry point. // RUN: llvm-as %S/Inputs/SYCL/external-fn.ll -o %t-ext.bc // RUN: clang-sycl-linker --dry-run -v -triple=spirv64 --module-split-mode=source %t-ext.bc -o %t-ext.out 2>&1 \ -// RUN: | FileCheck %s --check-prefix=SPLIT-EXT-DEFAULT -// SPLIT-EXT-DEFAULT: sycl-module-split: input: {{.*}}.bc, output: [[S0:.*]].bc, mode: source +// RUN: | FileCheck %s --check-prefix=SPLIT-EXT-NO-ENTRY +// SPLIT-EXT-NO-ENTRY: sycl-module-split: input: {{.*}}.bc, output: [[S0:.*]].bc, mode: source diff --git a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp index af2273dd33dbb..58fbd7706661d 100644 --- a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp +++ b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp @@ -513,6 +513,8 @@ static bool isEntryPoint(const Function &F, bool EmitOnlyKernelsAsEntryPoints) { if (EmitOnlyKernelsAsEntryPoints) return false; // sycl_external functions carry the "sycl-module-id" attribute. + // This branch is not reachable while EmitOnlyKernelsAsEntryPoints is + // hardcoded to true (see TODO in runSYCLLink). return F.hasFnAttribute(AttrSYCLModuleId); } @@ -528,6 +530,12 @@ static SmallString<0> collectSymbols(const Module &M, return SymbolData; } +/// Functor passed to splitModuleTransitiveFromEntryPoints. For each input \p F, +/// returns a numeric group ID (if \p F is an entry point) determining which +/// device image it lands in, or std::nullopt (for non-entry-points). +/// SPLIT_PER_KERNEL \p Mode gives each kernel its own ID; +/// SPLIT_PER_TU \p Mode groups kernels by their "sycl-module-id" attribute +/// value. class EntryPointCategorizer { public: EntryPointCategorizer(IRSplitMode Mode, bool EmitOnlyKernelsAsEntryPoints) @@ -562,34 +570,16 @@ class EntryPointCategorizer { /// Splits the fully linked device \p M into one bitcode file per device image /// according to \p Mode and returns the list of split images with their symbol -/// tables. -/// -/// For SPLIT_NONE, \p LinkedBitcodeFile is returned as-is. -/// For all other modes the module is split transitively from entry points; -/// each part is written to a fresh temporary bitcode file. +/// tables. The module is split transitively from entry points; each part is +/// written to a fresh temporary bitcode file. static Expected<SmallVector<SplitModule, 0>> splitDeviceCode(std::unique_ptr<Module> M, StringRef LinkedBitcodeFile, IRSplitMode Mode, bool EmitOnlyKernelsAsEntryPoints, const ArgList &Args) { - SmallVector<SplitModule, 0> SplitModules; - - if (Mode == IRSplitMode::SPLIT_NONE) { - SplitModules.push_back({SmallString<256>(LinkedBitcodeFile), - collectSymbols(*M, EmitOnlyKernelsAsEntryPoints)}); - return SplitModules; - } - - // splitModuleTransitiveFromEntryPoints requires at least one categorized - // entry point. Fall back to a single image if the module has none. - bool HasEntryPoint = llvm::any_of(M->functions(), [&](const Function &F) { - return isEntryPoint(F, EmitOnlyKernelsAsEntryPoints); - }); - if (!HasEntryPoint) { - SplitModules.push_back({SmallString<256>(LinkedBitcodeFile), - collectSymbols(*M, EmitOnlyKernelsAsEntryPoints)}); - return SplitModules; - } + assert(Mode != IRSplitMode::SPLIT_NONE && + "Any split method except None should be specified"); + SmallVector<SplitModule, 0> SplitModules; EntryPointCategorizer Categorizer(Mode, EmitOnlyKernelsAsEntryPoints); auto SplitCallback = [&](std::unique_ptr<Module> Part) -> Error { @@ -614,9 +604,29 @@ splitDeviceCode(std::unique_ptr<Module> M, StringRef LinkedBitcodeFile, std::move(M), Categorizer, SplitCallback)) return Err; + if (Verbose || DryRun) { + SmallVector<StringRef> SplitFiles; + for (const SplitModule &SI : SplitModules) + SplitFiles.push_back(SI.ModuleFilePath); + errs() << formatv("sycl-module-split: input: {0}, output: {1}, mode: {2}\n", + LinkedBitcodeFile, llvm::join(SplitFiles, ", "), + splitModeToString(Mode)); + } + return SplitModules; } +/// Returns true if module splitting can be skipped: either \p Mode is +/// SPLIT_NONE, or \p M contains no entry points (nothing to split from). +static bool checkModuleSplitCanBeSkipped(IRSplitMode Mode, const Module &M, + bool EmitOnlyKernelsAsEntryPoints) { + if (Mode == IRSplitMode::SPLIT_NONE) + return true; + return !llvm::any_of(M.functions(), [&](const Function &F) { + return isEntryPoint(F, EmitOnlyKernelsAsEntryPoints); + }); +} + /// Performs the following steps: /// 1. Link input device code (user code and SYCL device library code). /// 2. Run SPIR-V code generation. @@ -647,20 +657,20 @@ Error runSYCLLink(ArrayRef<std::string> Files, const ArgList &Args) { // can be called across device image boundaries. bool EmitOnlyKernelsAsEntryPoints = true; - // Split the linked module into one or more device images. - Expected<SmallVector<SplitModule, 0>> SplitModulesOrErr = - splitDeviceCode(std::move(LinkedModule), LinkedFile, SplitMode, - EmitOnlyKernelsAsEntryPoints, Args); - if (!SplitModulesOrErr) - return SplitModulesOrErr.takeError(); - SmallVector<SplitModule, 0> &SplitModules = *SplitModulesOrErr; - if (Verbose) { - SmallVector<StringRef> SplitFiles; - for (const SplitModule &SI : SplitModules) - SplitFiles.push_back(SI.ModuleFilePath); - errs() << formatv("sycl-module-split: input: {0}, output: {1}, mode: {2}\n", - LinkedFile, llvm::join(SplitFiles, ", "), - splitModeToString(SplitMode)); + SmallVector<SplitModule, 0> SplitModules; + if (checkModuleSplitCanBeSkipped(SplitMode, *LinkedModule, + EmitOnlyKernelsAsEntryPoints)) { + SplitModules.push_back( + {SmallString<256>(LinkedFile), + collectSymbols(*LinkedModule, EmitOnlyKernelsAsEntryPoints)}); + } else { + Expected<SmallVector<SplitModule, 0>> SplitModulesOrErr = + splitDeviceCode(std::move(LinkedModule), LinkedFile, SplitMode, + EmitOnlyKernelsAsEntryPoints, Args); + if (!SplitModulesOrErr) + return SplitModulesOrErr.takeError(); + + SplitModules = std::move(*SplitModulesOrErr); } bool IsAOTCompileNeeded = IsIntelOffloadArch( diff --git a/clang/tools/clang-sycl-linker/SYCLLinkOpts.td b/clang/tools/clang-sycl-linker/SYCLLinkOpts.td index c60b06573d8f3..c6587d8d10fbf 100644 --- a/clang/tools/clang-sycl-linker/SYCLLinkOpts.td +++ b/clang/tools/clang-sycl-linker/SYCLLinkOpts.td @@ -58,10 +58,8 @@ def opencl_aot_options_EQ : Joined<["--", "-"], "opencl-aot-options=">, def module_split_mode_EQ : Joined<["--", "-"], "module-split-mode=">, Flags<[LinkerOnlyOption]>, MetaVarName<"<mode>">, HelpText<"SYCL device code module split mode. Valid values: " - "'source' (default) emits one device image per translation unit " - "that contains at least one kernel (grouped by the 'sycl-module-id' " - "attribute); translation units containing only sycl_external " - "functions do not produce a device image, this behavior may change " - "in the future; " + "'source' (default) emits one device image per translation unit that contains " + "at least one kernel; translation units containing only sycl_external " + "functions do not produce a device image, this behavior may change in the future; " "'kernel' emits one device image per kernel function; " "'none' emits a single device image.">; _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
