https://github.com/YuriPlyakhin updated 
https://github.com/llvm/llvm-project/pull/196435

>From de5f17e5602c93d3aa6814078ee216dcf3687ece Mon Sep 17 00:00:00 2001
From: "Plyakhin, Yury" <[email protected]>
Date: Wed, 6 May 2026 18:32:36 +0200
Subject: [PATCH 1/2] [clang-sycl-linker] Add per-translation-unit device code
 split mode

Adds `source` split mode to `clang-sycl-linker`, driven by the
`sycl-module-id` function attribute emitted by the SYCL frontend.
`source` is the default mode and groups kernels by the value of their
`sycl-module-id` attribute, emitting one device image per translation
unit. If the linked module contains no kernels, no device image is
emitted. `none` disables splitting and emits a single device image.
`kernel` emits one device image per kernel function.

The `EntryPointCategorizer` in `ClangSYCLLinker.cpp` is refactored into a
class (instead of a stateful lambda) to support both per-kernel and per-TU
modes cleanly.

`llvm-split`'s `-split-by-category=module-id` is renamed to
`-split-by-category=attribute` and the previously hardcoded `"module-id"`
attribute name is replaced by a required `--category-attribute=<name>` CLI
option. This decouples the tool from any specific attribute name. All
`SplitByCategory` tests are updated accordingly.

Co-Authored-By: Claude <[email protected]>
---
 clang/test/Driver/Inputs/SYCL/external-fn.ll  |  19 +++
 clang/test/Driver/Inputs/SYCL/two-modules.ll  |  25 ++++
 clang/test/Driver/clang-sycl-linker-test.cpp  |  32 ++++-
 .../clang-sycl-linker/ClangSYCLLinker.cpp     | 131 ++++++++++++------
 clang/tools/clang-sycl-linker/SYCLLinkOpts.td |  11 +-
 .../complex-indirect-call-chain1.ll           |   2 +-
 .../complex-indirect-call-chain2.ll           |   2 +-
 .../SplitByCategory/module-split-func-ptr.ll  |   2 +-
 .../SplitByCategory/split-by-source.ll        |   2 +-
 .../split-with-kernel-declarations.ll         |   2 +-
 llvm/tools/llvm-split/llvm-split.cpp          |  47 ++++---
 11 files changed, 205 insertions(+), 70 deletions(-)
 create mode 100644 clang/test/Driver/Inputs/SYCL/external-fn.ll
 create mode 100644 clang/test/Driver/Inputs/SYCL/two-modules.ll

diff --git a/clang/test/Driver/Inputs/SYCL/external-fn.ll 
b/clang/test/Driver/Inputs/SYCL/external-fn.ll
new file mode 100644
index 0000000000000..b6ec0de46bdad
--- /dev/null
+++ b/clang/test/Driver/Inputs/SYCL/external-fn.ll
@@ -0,0 +1,19 @@
+target datalayout = 
"e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1"
+target triple = "spirv64"
+
+; A kernel from TU1 and a sycl_external function from TU2.
+
+define spir_func i32 @ext_fn(i32 %a) #1 {
+entry:
+  %r = add nsw i32 %a, 2
+  ret i32 %r
+}
+
+define spir_kernel void @k(ptr addrspace(1) %out) #0 {
+entry:
+  store i32 42, ptr addrspace(1) %out, align 4
+  ret void
+}
+
+attributes #0 = { "sycl-module-id"="TU1.cpp" }
+attributes #1 = { "sycl-module-id"="TU2.cpp" }
diff --git a/clang/test/Driver/Inputs/SYCL/two-modules.ll 
b/clang/test/Driver/Inputs/SYCL/two-modules.ll
new file mode 100644
index 0000000000000..d63f0e6f38726
--- /dev/null
+++ b/clang/test/Driver/Inputs/SYCL/two-modules.ll
@@ -0,0 +1,25 @@
+target datalayout = 
"e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1"
+target triple = "spirv64"
+
+define spir_func i32 @helper(i32 %a) {
+entry:
+  %r = add nsw i32 %a, 1
+  ret i32 %r
+}
+
+define spir_kernel void @kernel_a(ptr addrspace(1) %out, i32 %a) #0 {
+entry:
+  %r = call spir_func i32 @helper(i32 %a)
+  store i32 %r, ptr addrspace(1) %out, align 4
+  ret void
+}
+
+define spir_kernel void @kernel_b(ptr addrspace(1) %out, i32 %a) #1 {
+entry:
+  %r = call spir_func i32 @helper(i32 %a)
+  store i32 %r, ptr addrspace(1) %out, align 4
+  ret void
+}
+
+attributes #0 = { "sycl-module-id"="TU1.cpp" }
+attributes #1 = { "sycl-module-id"="TU2.cpp" }
diff --git a/clang/test/Driver/clang-sycl-linker-test.cpp 
b/clang/test/Driver/clang-sycl-linker-test.cpp
index cd99d4d47b1e1..69596252efdf0 100644
--- a/clang/test/Driver/clang-sycl-linker-test.cpp
+++ b/clang/test/Driver/clang-sycl-linker-test.cpp
@@ -3,13 +3,14 @@
 // REQUIRES: spirv-registered-target
 //
 // Test the dry run of a simple case to link two input files.
-// Also verifies the default split mode ("none").
+// The input has no SYCL kernels, so the default split mode ('source') produces
+// a single device image via the no-entry-point fallback.
 // RUN: %clangxx -emit-llvm -c -target spirv64 %s -o %t_1.bc
 // RUN: %clangxx -emit-llvm -c -target spirv64 %s -o %t_2.bc
 // RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc -o 
%t-spirv.out 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=SIMPLE-FO
 // SIMPLE-FO:      sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc  libfiles:  
output: [[LLVMLINKOUT:.*]].bc
-// SIMPLE-FO-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: 
[[LLVMLINKOUT]].bc, mode: none
+// SIMPLE-FO-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: 
[[LLVMLINKOUT]].bc, mode: source
 // SIMPLE-FO-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: 
{{.*}}_0.spv
 //
 // Test that IMG_SPIRV image kind is set for non-AOT compilation.
@@ -17,13 +18,14 @@
 // IMAGE-KIND-SPIRV: kind            spir-v
 //
 // Test the dry run of a simple case with device library files specified.
+// No kernels in input; default split mode ('source') produces a single image.
 // RUN: mkdir -p %t.dir
 // RUN: touch %t.dir/lib1.bc
 // RUN: touch %t.dir/lib2.bc
 // RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc 
--library-path=%t.dir --device-libs=lib1.bc,lib2.bc -o a.spv 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=DEVLIBS
 // DEVLIBS:      sycl-device-link: inputs: {{.*}}.bc  libfiles: {{.*}}lib1.bc, 
{{.*}}lib2.bc  output: [[LLVMLINKOUT:.*]].bc
-// DEVLIBS-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: 
[[LLVMLINKOUT]].bc, mode: none
+// DEVLIBS-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: 
[[LLVMLINKOUT]].bc, mode: source
 // DEVLIBS-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: a_0.spv
 //
 // Test a simple case with a random file (not bitcode) as input.
@@ -41,11 +43,12 @@
 // DEVLIBSERR2: '{{.*}}lib3.bc' SYCL device library file is not found
 //
 // Test AOT compilation for an Intel GPU.
+// No kernels in input; default split mode ('source') produces a single image.
 // RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=bmg_g21 %t_1.bc 
%t_2.bc -o %t-aot-gpu.out 2>&1 \
 // RUN:     --ocloc-options="-a -b" \
 // RUN:   | FileCheck %s --check-prefix=AOT-INTEL-GPU
 // AOT-INTEL-GPU:      sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc 
libfiles: output: [[LLVMLINKOUT:.*]].bc
-// AOT-INTEL-GPU-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: 
[[LLVMLINKOUT]].bc, mode: none
+// AOT-INTEL-GPU-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: 
[[LLVMLINKOUT]].bc, mode: source
 // AOT-INTEL-GPU-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: 
[[SPIRVTRANSLATIONOUT:.*]]_0.spv
 // AOT-INTEL-GPU-NEXT: "{{.*}}ocloc{{.*}}" {{.*}}-device bmg_g21 -a -b 
{{.*}}-output [[SPIRVTRANSLATIONOUT]]_0.out -file [[SPIRVTRANSLATIONOUT]]_0.spv
 //
@@ -54,11 +57,12 @@
 // IMAGE-KIND-OBJECT: kind            elf
 //
 // Test AOT compilation for an Intel CPU.
+// No kernels in input; default split mode ('source') produces a single image.
 // RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=graniterapids 
%t_1.bc %t_2.bc -o %t-aot-cpu.out 2>&1 \
 // RUN:     --opencl-aot-options="-a -b" \
 // RUN:   | FileCheck %s --check-prefix=AOT-INTEL-CPU
 // AOT-INTEL-CPU:      sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc 
libfiles: output: [[LLVMLINKOUT:.*]].bc
-// AOT-INTEL-CPU-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: 
[[LLVMLINKOUT]].bc, mode: none
+// AOT-INTEL-CPU-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: 
[[LLVMLINKOUT]].bc, mode: source
 // AOT-INTEL-CPU-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: 
[[SPIRVTRANSLATIONOUT:.*]]_0.spv
 // AOT-INTEL-CPU-NEXT: "{{.*}}opencl-aot{{.*}}" {{.*}}--device=cpu -a -b 
{{.*}}-o [[SPIRVTRANSLATIONOUT]]_0.out [[SPIRVTRANSLATIONOUT]]_0.spv
 //
@@ -97,3 +101,21 @@
 // RUN: not clang-sycl-linker --dry-run -triple=spirv64 
--module-split-mode=bogus %t_1.bc -o a.out 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=SPLIT-INVALID
 // SPLIT-INVALID: module-split-mode value isn't recognized: bogus
+//
+// Test per-TU split: two kernels with different sycl-module-id values produce
+// two device images.
+// RUN: llvm-as %S/Inputs/SYCL/two-modules.ll -o %t-tu.bc
+// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 
--module-split-mode=source %t-tu.bc -o %t-src.out 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=SPLIT-SRC
+// SPLIT-SRC:      sycl-device-link: inputs: {{.*}}.bc  libfiles:  output: 
[[LLVMLINKOUT:.*]].bc
+// SPLIT-SRC-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: 
[[S0:.*]].bc, [[S1:.*]].bc, mode: source
+// SPLIT-SRC-NEXT: LLVM backend: input: [[S0]].bc, output: {{.*}}_0.spv
+// SPLIT-SRC-NEXT: LLVM backend: input: [[S1]].bc, output: {{.*}}_1.spv
+//
+// Test that sycl_external functions are not treated as entry points: a kernel
+// from TU1 and a sycl_external function from TU2 produce a single image,
+// since only the kernel is an entry point.
+// RUN: llvm-as %S/Inputs/SYCL/external-fn.ll -o %t-ext.bc
+// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 
--module-split-mode=source %t-ext.bc -o %t-ext.out 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=SPLIT-EXT-DEFAULT
+// SPLIT-EXT-DEFAULT: sycl-module-split: input: {{.*}}.bc, output: 
[[S0:.*]].bc, mode: source
diff --git a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp 
b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
index 5a525d263427d..af2273dd33dbb 100644
--- a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
+++ b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
@@ -468,20 +468,36 @@ static Error runAOTCompile(StringRef InputFile, StringRef 
OutputFile,
   return createStringError(inconvertibleErrorCode(), "Unsupported arch");
 }
 
+static constexpr char AttrSYCLModuleId[] = "sycl-module-id";
+
 /// SYCL device code module split mode.
 enum class IRSplitMode {
+  SPLIT_PER_TU,     // one module per translation unit
   SPLIT_PER_KERNEL, // one module per kernel
   SPLIT_NONE        // no splitting
 };
 
-/// Parses the value of \p -module-split-mode.
+/// Parses the value of \p --module-split-mode.
 static std::optional<IRSplitMode> convertStringToSplitMode(StringRef S) {
   return StringSwitch<std::optional<IRSplitMode>>(S)
+      .Case("source", IRSplitMode::SPLIT_PER_TU)
       .Case("kernel", IRSplitMode::SPLIT_PER_KERNEL)
       .Case("none", IRSplitMode::SPLIT_NONE)
       .Default(std::nullopt);
 }
 
+static StringRef splitModeToString(IRSplitMode Mode) {
+  switch (Mode) {
+  case IRSplitMode::SPLIT_PER_TU:
+    return "source";
+  case IRSplitMode::SPLIT_PER_KERNEL:
+    return "kernel";
+  case IRSplitMode::SPLIT_NONE:
+    return "none";
+  }
+  llvm_unreachable("bad split mode");
+}
+
 /// Result of splitting a device module: the bitcode file path and the
 /// serialized symbol table for each device image.
 struct SplitModule {
@@ -489,63 +505,92 @@ struct SplitModule {
   SmallString<0> Symbols;
 };
 
-static bool isEntryPoint(const Function &F) {
-  return !F.isDeclaration() && F.hasKernelCallingConv();
+static bool isEntryPoint(const Function &F, bool EmitOnlyKernelsAsEntryPoints) 
{
+  if (F.isDeclaration())
+    return false;
+  if (F.hasKernelCallingConv())
+    return true;
+  if (EmitOnlyKernelsAsEntryPoints)
+    return false;
+  // sycl_external functions carry the "sycl-module-id" attribute.
+  return F.hasFnAttribute(AttrSYCLModuleId);
 }
 
-/// Collect kernel names from \p M and serialize them into a symbol table.
-static SmallString<0> collectSymbols(const Module &M) {
-  SmallVector<StringRef> KernelNames;
+/// Collect entry point names from \p M and serialize them into a symbol table.
+static SmallString<0> collectSymbols(const Module &M,
+                                     bool EmitOnlyKernelsAsEntryPoints) {
+  SmallVector<StringRef> Names;
   for (const Function &F : M)
-    if (isEntryPoint(F))
-      KernelNames.push_back(F.getName());
+    if (isEntryPoint(F, EmitOnlyKernelsAsEntryPoints))
+      Names.push_back(F.getName());
   SmallString<0> SymbolData;
-  llvm::offloading::sycl::writeSymbolTable(KernelNames, SymbolData);
+  llvm::offloading::sycl::writeSymbolTable(Names, SymbolData);
   return SymbolData;
 }
 
+class EntryPointCategorizer {
+public:
+  EntryPointCategorizer(IRSplitMode Mode, bool EmitOnlyKernelsAsEntryPoints)
+      : Mode(Mode), OnlyKernelsAreEntryPoints(EmitOnlyKernelsAsEntryPoints) {}
+
+  std::optional<int> operator()(const Function &F) {
+    if (!isEntryPoint(F, OnlyKernelsAreEntryPoints))
+      return std::nullopt;
+
+    std::string Key;
+    switch (Mode) {
+    case IRSplitMode::SPLIT_PER_KERNEL:
+      Key = F.getName().str();
+      break;
+    case IRSplitMode::SPLIT_PER_TU:
+      Key = F.getFnAttribute(AttrSYCLModuleId).getValueAsString().str();
+      break;
+    case IRSplitMode::SPLIT_NONE:
+      llvm_unreachable("categorizer not used for SPLIT_NONE");
+    }
+
+    auto [It, Inserted] =
+        StrToId.try_emplace(std::move(Key), static_cast<int>(StrToId.size()));
+    return It->second;
+  }
+
+private:
+  IRSplitMode Mode;
+  bool OnlyKernelsAreEntryPoints;
+  llvm::StringMap<int> StrToId;
+};
+
 /// Splits the fully linked device \p M into one bitcode file per device image
 /// according to \p Mode and returns the list of split images with their symbol
 /// tables.
 ///
 /// For SPLIT_NONE, \p LinkedBitcodeFile is returned as-is.
-/// For SPLIT_PER_KERNEL, the module is split into parts such that each part
-/// contains exactly one kernel entry point and its transitive dependencies;
+/// For all other modes the module is split transitively from entry points;
 /// each part is written to a fresh temporary bitcode file.
 static Expected<SmallVector<SplitModule, 0>>
 splitDeviceCode(std::unique_ptr<Module> M, StringRef LinkedBitcodeFile,
-                IRSplitMode Mode, const ArgList &Args) {
+                IRSplitMode Mode, bool EmitOnlyKernelsAsEntryPoints,
+                const ArgList &Args) {
   SmallVector<SplitModule, 0> SplitModules;
 
   if (Mode == IRSplitMode::SPLIT_NONE) {
-    SplitModules.push_back(
-        {SmallString<256>(LinkedBitcodeFile), collectSymbols(*M)});
+    SplitModules.push_back({SmallString<256>(LinkedBitcodeFile),
+                            collectSymbols(*M, EmitOnlyKernelsAsEntryPoints)});
     return SplitModules;
   }
 
-  assert(Mode == IRSplitMode::SPLIT_PER_KERNEL);
-
-  // splitModuleTransitiveFromEntryPoints asserts that at least one entry point
-  // was categorized. If the linked module contains no kernel definitions at
-  // all, there is nothing to split; fall back to shipping the linked module
-  // as a single image.
-  bool HasKernel = llvm::any_of(M->functions(), isEntryPoint);
-  if (!HasKernel) {
-    SplitModules.push_back(
-        {SmallString<256>(LinkedBitcodeFile), collectSymbols(*M)});
+  // splitModuleTransitiveFromEntryPoints requires at least one categorized
+  // entry point. Fall back to a single image if the module has none.
+  bool HasEntryPoint = llvm::any_of(M->functions(), [&](const Function &F) {
+    return isEntryPoint(F, EmitOnlyKernelsAsEntryPoints);
+  });
+  if (!HasEntryPoint) {
+    SplitModules.push_back({SmallString<256>(LinkedBitcodeFile),
+                            collectSymbols(*M, EmitOnlyKernelsAsEntryPoints)});
     return SplitModules;
   }
 
-  // Categorize each kernel function into its own group. Non-kernels and
-  // declarations return std::nullopt so they are pulled into whichever split
-  // transitively needs them.
-  int NextCategory = 0;
-  auto EntryPointCategorizer =
-      [&NextCategory](const Function &F) -> std::optional<int> {
-    if (!isEntryPoint(F))
-      return std::nullopt;
-    return NextCategory++;
-  };
+  EntryPointCategorizer Categorizer(Mode, EmitOnlyKernelsAsEntryPoints);
 
   auto SplitCallback = [&](std::unique_ptr<Module> Part) -> Error {
     Expected<StringRef> BitcodeFileOrErr =
@@ -560,12 +605,13 @@ splitDeviceCode(std::unique_ptr<Module> M, StringRef 
LinkedBitcodeFile,
     WriteBitcodeToFile(*Part, OS);
 
     SplitModules.push_back(
-        {SmallString<256>(*BitcodeFileOrErr), collectSymbols(*Part)});
+        {SmallString<256>(*BitcodeFileOrErr),
+         collectSymbols(*Part, EmitOnlyKernelsAsEntryPoints)});
     return Error::success();
   };
 
   if (Error Err = splitModuleTransitiveFromEntryPoints(
-          std::move(M), EntryPointCategorizer, SplitCallback))
+          std::move(M), Categorizer, SplitCallback))
     return Err;
 
   return SplitModules;
@@ -586,7 +632,7 @@ Error runSYCLLink(ArrayRef<std::string> Files, const 
ArgList &Args) {
   auto &[LinkedModule, LinkedFile] = *LinkedOrErr;
 
   // Determine the requested module split mode.
-  IRSplitMode SplitMode = IRSplitMode::SPLIT_NONE;
+  IRSplitMode SplitMode = IRSplitMode::SPLIT_PER_TU;
   if (Arg *A = Args.getLastArg(OPT_module_split_mode_EQ)) {
     std::optional<IRSplitMode> ModeOrNone =
         convertStringToSplitMode(A->getValue());
@@ -596,9 +642,15 @@ Error runSYCLLink(ArrayRef<std::string> Files, const 
ArgList &Args) {
     SplitMode = *ModeOrNone;
   }
 
+  // TODO: Expose this as a command-line option and default it to false when
+  // device-image dynamic linking is supported, so that sycl_external functions
+  // can be called across device image boundaries.
+  bool EmitOnlyKernelsAsEntryPoints = true;
+
   // Split the linked module into one or more device images.
   Expected<SmallVector<SplitModule, 0>> SplitModulesOrErr =
-      splitDeviceCode(std::move(LinkedModule), LinkedFile, SplitMode, Args);
+      splitDeviceCode(std::move(LinkedModule), LinkedFile, SplitMode,
+                      EmitOnlyKernelsAsEntryPoints, Args);
   if (!SplitModulesOrErr)
     return SplitModulesOrErr.takeError();
   SmallVector<SplitModule, 0> &SplitModules = *SplitModulesOrErr;
@@ -608,8 +660,7 @@ Error runSYCLLink(ArrayRef<std::string> Files, const 
ArgList &Args) {
       SplitFiles.push_back(SI.ModuleFilePath);
     errs() << formatv("sycl-module-split: input: {0}, output: {1}, mode: 
{2}\n",
                       LinkedFile, llvm::join(SplitFiles, ", "),
-                      SplitMode == IRSplitMode::SPLIT_PER_KERNEL ? "kernel"
-                                                                 : "none");
+                      splitModeToString(SplitMode));
   }
 
   bool IsAOTCompileNeeded = IsIntelOffloadArch(
diff --git a/clang/tools/clang-sycl-linker/SYCLLinkOpts.td 
b/clang/tools/clang-sycl-linker/SYCLLinkOpts.td
index 740c5a4783ac9..c60b06573d8f3 100644
--- a/clang/tools/clang-sycl-linker/SYCLLinkOpts.td
+++ b/clang/tools/clang-sycl-linker/SYCLLinkOpts.td
@@ -57,6 +57,11 @@ def opencl_aot_options_EQ : Joined<["--", "-"], 
"opencl-aot-options=">,
 
 def module_split_mode_EQ : Joined<["--", "-"], "module-split-mode=">,
   Flags<[LinkerOnlyOption]>, MetaVarName<"<mode>">,
-  HelpText<"SYCL device code module split mode. Valid values: 'none' (default) 
"
-           "emits a single device image; 'kernel' emits one device image per "
-           "kernel function.">;
+  HelpText<"SYCL device code module split mode. Valid values: "
+           "'source' (default) emits one device image per translation unit "
+           "that contains at least one kernel (grouped by the 'sycl-module-id' 
"
+           "attribute); translation units containing only sycl_external "
+           "functions do not produce a device image, this behavior may change "
+           "in the future; "
+           "'kernel' emits one device image per kernel function; "
+           "'none' emits a single device image.">;
diff --git 
a/llvm/test/tools/llvm-split/SplitByCategory/complex-indirect-call-chain1.ll 
b/llvm/test/tools/llvm-split/SplitByCategory/complex-indirect-call-chain1.ll
index 80123d4dd8fb7..50e08cc093d83 100644
--- a/llvm/test/tools/llvm-split/SplitByCategory/complex-indirect-call-chain1.ll
+++ b/llvm/test/tools/llvm-split/SplitByCategory/complex-indirect-call-chain1.ll
@@ -1,7 +1,7 @@
 ; Check that Module splitting can trace through more complex call stacks
 ; involving several nested indirect calls.
 
-; RUN: llvm-split -split-by-category=module-id -S < %s -o %t
+; RUN: llvm-split -split-by-category=attribute --category-attribute=module-id 
-S < %s -o %t
 ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix CHECK0 \
 ; RUN:     --implicit-check-not @foo --implicit-check-not @kernel_A \
 ; RUN:     --implicit-check-not @kernel_B --implicit-check-not @baz
diff --git 
a/llvm/test/tools/llvm-split/SplitByCategory/complex-indirect-call-chain2.ll 
b/llvm/test/tools/llvm-split/SplitByCategory/complex-indirect-call-chain2.ll
index 0c80602f99eef..aa84c5fbf904a 100644
--- a/llvm/test/tools/llvm-split/SplitByCategory/complex-indirect-call-chain2.ll
+++ b/llvm/test/tools/llvm-split/SplitByCategory/complex-indirect-call-chain2.ll
@@ -1,6 +1,6 @@
 ; Check that Module splitting can trace indirect calls through signatures.
 
-; RUN: llvm-split -split-by-category=module-id -S < %s -o %t
+; RUN: llvm-split -split-by-category=attribute --category-attribute=module-id 
-S < %s -o %t
 ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix CHECK0 \
 ; RUN:     --implicit-check-not @kernel_A --implicit-check-not @bbb
 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefix CHECK1 \
diff --git 
a/llvm/test/tools/llvm-split/SplitByCategory/module-split-func-ptr.ll 
b/llvm/test/tools/llvm-split/SplitByCategory/module-split-func-ptr.ll
index 316500a4c7611..ee263fc38a893 100644
--- a/llvm/test/tools/llvm-split/SplitByCategory/module-split-func-ptr.ll
+++ b/llvm/test/tools/llvm-split/SplitByCategory/module-split-func-ptr.ll
@@ -1,7 +1,7 @@
 ; This test checks that Module splitting can properly perform device code 
split by tracking
 ; all uses of functions (not only direct calls).
 
-; RUN: llvm-split -split-by-category=module-id -S < %s -o %t
+; RUN: llvm-split -split-by-category=attribute --category-attribute=module-id 
-S < %s -o %t
 ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix=CHECK-IR0
 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefix=CHECK-IR1
 
diff --git a/llvm/test/tools/llvm-split/SplitByCategory/split-by-source.ll 
b/llvm/test/tools/llvm-split/SplitByCategory/split-by-source.ll
index 54485b7b7f348..dc0cc292f50fe 100644
--- a/llvm/test/tools/llvm-split/SplitByCategory/split-by-source.ll
+++ b/llvm/test/tools/llvm-split/SplitByCategory/split-by-source.ll
@@ -1,7 +1,7 @@
 ; Test checks that kernels are being split by attached module-id metadata and
 ; used functions are being moved with kernels that use them.
 
-; RUN: llvm-split -split-by-category=module-id -S < %s -o %t
+; RUN: llvm-split -split-by-category=attribute --category-attribute=module-id 
-S < %s -o %t
 ; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-TU0,CHECK
 ; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-TU1,CHECK
 
diff --git 
a/llvm/test/tools/llvm-split/SplitByCategory/split-with-kernel-declarations.ll 
b/llvm/test/tools/llvm-split/SplitByCategory/split-with-kernel-declarations.ll
index 0c1bd8b5c5fba..59a7a95761d9d 100644
--- 
a/llvm/test/tools/llvm-split/SplitByCategory/split-with-kernel-declarations.ll
+++ 
b/llvm/test/tools/llvm-split/SplitByCategory/split-with-kernel-declarations.ll
@@ -1,6 +1,6 @@
 ; The test checks that Module splitting does not treat declarations as entry 
points.
 
-; RUN: llvm-split -split-by-category=module-id -S < %s -o %t1
+; RUN: llvm-split -split-by-category=attribute --category-attribute=module-id 
-S < %s -o %t1
 ; RUN: FileCheck %s -input-file=%t1_0.ll --check-prefix CHECK-MODULE-ID0
 ; RUN: FileCheck %s -input-file=%t1_1.ll --check-prefix CHECK-MODULE-ID1
 
diff --git a/llvm/tools/llvm-split/llvm-split.cpp 
b/llvm/tools/llvm-split/llvm-split.cpp
index a987b8c1b3eb4..68812ae7158b4 100644
--- a/llvm/tools/llvm-split/llvm-split.cpp
+++ b/llvm/tools/llvm-split/llvm-split.cpp
@@ -78,7 +78,7 @@ static cl::opt<std::string>
          cl::value_desc("cpu"), cl::cat(SplitCategory));
 
 enum class SplitByCategoryType {
-  SBCT_ByModuleId,
+  SBCT_ByAttribute,
   SBCT_ByKernel,
   SBCT_None,
 };
@@ -88,13 +88,19 @@ static cl::opt<SplitByCategoryType> SplitByCategory(
     cl::desc("Split by category. If present, splitting by category is used "
              "with the specified categorization type."),
     cl::Optional, cl::init(SplitByCategoryType::SBCT_None),
-    cl::values(clEnumValN(SplitByCategoryType::SBCT_ByModuleId, "module-id",
-                          "one output module per translation unit marked with "
-                          "\"module-id\" attribute"),
+    cl::values(clEnumValN(SplitByCategoryType::SBCT_ByAttribute, "attribute",
+                          "one output module per unique value of the function "
+                          "attribute named by --category-attribute"),
                clEnumValN(SplitByCategoryType::SBCT_ByKernel, "kernel",
                           "one output module per kernel")),
     cl::cat(SplitCategory));
 
+static cl::opt<std::string>
+    CategoryAttribute("category-attribute",
+                      cl::desc("Function attribute name to use when splitting "
+                               "with -split-by-category=attribute"),
+                      cl::value_desc("name"), cl::cat(SplitCategory));
+
 static cl::opt<bool> OutputAssembly{
     "S", cl::desc("Write output as LLVM assembly"), cl::cat(SplitCategory)};
 
@@ -125,15 +131,16 @@ void writeModuleToFile(const Module &M, StringRef Path, 
bool OutputAssembly) {
     WriteBitcodeToFile(M, OS);
 }
 
-/// EntryPointCategorizer is used for splitting by category either by module-id
-/// or by kernels. It doesn't provide categories for functions other than
-/// kernels. Categorizer computes a string key for the given Function and
-/// records the association between the string key and an integer category. If 
a
-/// string key is already belongs to some category than the corresponding
-/// integer category is returned.
+/// EntryPointCategorizer is used for splitting by category either by a named
+/// function attribute or by kernels. It doesn't provide categories for
+/// functions other than kernels. Categorizer computes a string key for the
+/// given Function and records the association between the string key and an
+/// integer category. If a string key already belongs to some category then the
+/// corresponding integer category is returned.
 class EntryPointCategorizer {
 public:
-  EntryPointCategorizer(SplitByCategoryType Type) : Type(Type) {}
+  EntryPointCategorizer(SplitByCategoryType Type, StringRef AttributeName)
+      : Type(Type), AttributeName(AttributeName) {}
 
   EntryPointCategorizer() = delete;
   EntryPointCategorizer(EntryPointCategorizer &) = delete;
@@ -163,16 +170,15 @@ class EntryPointCategorizer {
     return F.hasKernelCallingConv();
   }
 
-  static SmallString<0> computeFunctionCategory(SplitByCategoryType Type,
-                                                const Function &F) {
-    static constexpr char ATTR_MODULE_ID[] = "module-id";
+  SmallString<0> computeFunctionCategory(SplitByCategoryType Type,
+                                         const Function &F) {
     SmallString<0> Key;
     switch (Type) {
     case SplitByCategoryType::SBCT_ByKernel:
       Key = F.getName().str();
       break;
-    case SplitByCategoryType::SBCT_ByModuleId:
-      Key = F.getFnAttribute(ATTR_MODULE_ID).getValueAsString().str();
+    case SplitByCategoryType::SBCT_ByAttribute:
+      Key = F.getFnAttribute(AttributeName).getValueAsString().str();
       break;
     default:
       llvm_unreachable("unexpected mode.");
@@ -197,6 +203,7 @@ class EntryPointCategorizer {
   };
 
   SplitByCategoryType Type;
+  std::string AttributeName;
   DenseMap<SmallString<0>, int, KeyInfo> StrKeyToID;
 };
 
@@ -209,6 +216,12 @@ void cleanupModule(Module &M) {
 }
 
 Error runSplitModuleByCategory(std::unique_ptr<Module> M) {
+  if (SplitByCategory == SplitByCategoryType::SBCT_ByAttribute &&
+      CategoryAttribute.empty())
+    return createStringError(
+        inconvertibleErrorCode(),
+        "-split-by-category=attribute requires --category-attribute=<name>");
+
   size_t OutputID = 0;
   auto PostSplitCallback = [&](std::unique_ptr<Module> MPart) -> Error {
     if (verifyModule(*MPart)) {
@@ -228,7 +241,7 @@ Error runSplitModuleByCategory(std::unique_ptr<Module> M) {
     return Error::success();
   };
 
-  auto Categorizer = EntryPointCategorizer(SplitByCategory);
+  auto Categorizer = EntryPointCategorizer(SplitByCategory, CategoryAttribute);
   return splitModuleTransitiveFromEntryPoints(std::move(M), Categorizer,
                                               PostSplitCallback);
 }

>From 39f9343ac0df15b2789075f192c58925cc5d4879 Mon Sep 17 00:00:00 2001
From: "Plyakhin, Yury" <[email protected]>
Date: Sat, 9 May 2026 05:37:34 +0200
Subject: [PATCH 2/2] addressed feedback

---
 clang/test/Driver/clang-sycl-linker-test.cpp  | 33 ++++----
 .../clang-sycl-linker/ClangSYCLLinker.cpp     | 84 +++++++++++--------
 clang/tools/clang-sycl-linker/SYCLLinkOpts.td |  8 +-
 3 files changed, 64 insertions(+), 61 deletions(-)

diff --git a/clang/test/Driver/clang-sycl-linker-test.cpp 
b/clang/test/Driver/clang-sycl-linker-test.cpp
index 69596252efdf0..608d199805293 100644
--- a/clang/test/Driver/clang-sycl-linker-test.cpp
+++ b/clang/test/Driver/clang-sycl-linker-test.cpp
@@ -3,14 +3,11 @@
 // REQUIRES: spirv-registered-target
 //
 // Test the dry run of a simple case to link two input files.
-// The input has no SYCL kernels, so the default split mode ('source') produces
-// a single device image via the no-entry-point fallback.
 // RUN: %clangxx -emit-llvm -c -target spirv64 %s -o %t_1.bc
 // RUN: %clangxx -emit-llvm -c -target spirv64 %s -o %t_2.bc
 // RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc -o 
%t-spirv.out 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=SIMPLE-FO
 // SIMPLE-FO:      sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc  libfiles:  
output: [[LLVMLINKOUT:.*]].bc
-// SIMPLE-FO-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: 
[[LLVMLINKOUT]].bc, mode: source
 // SIMPLE-FO-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: 
{{.*}}_0.spv
 //
 // Test that IMG_SPIRV image kind is set for non-AOT compilation.
@@ -18,14 +15,12 @@
 // IMAGE-KIND-SPIRV: kind            spir-v
 //
 // Test the dry run of a simple case with device library files specified.
-// No kernels in input; default split mode ('source') produces a single image.
 // RUN: mkdir -p %t.dir
 // RUN: touch %t.dir/lib1.bc
 // RUN: touch %t.dir/lib2.bc
 // RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t_1.bc %t_2.bc 
--library-path=%t.dir --device-libs=lib1.bc,lib2.bc -o a.spv 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=DEVLIBS
 // DEVLIBS:      sycl-device-link: inputs: {{.*}}.bc  libfiles: {{.*}}lib1.bc, 
{{.*}}lib2.bc  output: [[LLVMLINKOUT:.*]].bc
-// DEVLIBS-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: 
[[LLVMLINKOUT]].bc, mode: source
 // DEVLIBS-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: a_0.spv
 //
 // Test a simple case with a random file (not bitcode) as input.
@@ -43,12 +38,10 @@
 // DEVLIBSERR2: '{{.*}}lib3.bc' SYCL device library file is not found
 //
 // Test AOT compilation for an Intel GPU.
-// No kernels in input; default split mode ('source') produces a single image.
 // RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=bmg_g21 %t_1.bc 
%t_2.bc -o %t-aot-gpu.out 2>&1 \
 // RUN:     --ocloc-options="-a -b" \
 // RUN:   | FileCheck %s --check-prefix=AOT-INTEL-GPU
 // AOT-INTEL-GPU:      sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc 
libfiles: output: [[LLVMLINKOUT:.*]].bc
-// AOT-INTEL-GPU-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: 
[[LLVMLINKOUT]].bc, mode: source
 // AOT-INTEL-GPU-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: 
[[SPIRVTRANSLATIONOUT:.*]]_0.spv
 // AOT-INTEL-GPU-NEXT: "{{.*}}ocloc{{.*}}" {{.*}}-device bmg_g21 -a -b 
{{.*}}-output [[SPIRVTRANSLATIONOUT]]_0.out -file [[SPIRVTRANSLATIONOUT]]_0.spv
 //
@@ -57,12 +50,10 @@
 // IMAGE-KIND-OBJECT: kind            elf
 //
 // Test AOT compilation for an Intel CPU.
-// No kernels in input; default split mode ('source') produces a single image.
 // RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=graniterapids 
%t_1.bc %t_2.bc -o %t-aot-cpu.out 2>&1 \
 // RUN:     --opencl-aot-options="-a -b" \
 // RUN:   | FileCheck %s --check-prefix=AOT-INTEL-CPU
 // AOT-INTEL-CPU:      sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc 
libfiles: output: [[LLVMLINKOUT:.*]].bc
-// AOT-INTEL-CPU-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: 
[[LLVMLINKOUT]].bc, mode: source
 // AOT-INTEL-CPU-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: 
[[SPIRVTRANSLATIONOUT:.*]]_0.spv
 // AOT-INTEL-CPU-NEXT: "{{.*}}opencl-aot{{.*}}" {{.*}}--device=cpu -a -b 
{{.*}}-o [[SPIRVTRANSLATIONOUT]]_0.out [[SPIRVTRANSLATIONOUT]]_0.spv
 //
@@ -79,11 +70,12 @@
 // RUN: | FileCheck %s --check-prefix=NOTARGET
 // NOTARGET: Target triple must be specified
 //
-// Test the split mode ("none"): no extra splits are produced.
-// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 
--module-split-mode=none %t_1.bc %t_2.bc -o %t-split-none.out 2>&1 \
+// Test the split mode ("none"): kernels from different TUs are not split into
+// separate images.
+// RUN: llvm-as %S/Inputs/SYCL/two-modules.ll -o %t-two-mod.bc
+// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 
--module-split-mode=none %t-two-mod.bc -o %t-split-none.out 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=SPLIT-NONE
-// SPLIT-NONE:      sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc  libfiles:  
output: [[LLVMLINKOUT:.*]].bc
-// SPLIT-NONE-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: 
[[LLVMLINKOUT]].bc, mode: none
+// SPLIT-NONE:      sycl-device-link: inputs: {{.*}}.bc  libfiles:  output: 
[[LLVMLINKOUT:.*]].bc
 // SPLIT-NONE-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: 
{{.*}}_0.spv
 // SPLIT-NONE-NOT:  LLVM backend: input: {{.*}}.bc, output: {{.*}}_1.spv
 //
@@ -102,10 +94,13 @@
 // RUN:   | FileCheck %s --check-prefix=SPLIT-INVALID
 // SPLIT-INVALID: module-split-mode value isn't recognized: bogus
 //
-// Test per-TU split: two kernels with different sycl-module-id values produce
-// two device images.
-// RUN: llvm-as %S/Inputs/SYCL/two-modules.ll -o %t-tu.bc
-// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 
--module-split-mode=source %t-tu.bc -o %t-src.out 2>&1 \
+// Test default split mode ('source'): no --module-split-mode flag needed.
+// Two kernels with different sycl-module-id values produce two device images.
+// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t-two-mod.bc -o 
%t-src.out 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=SPLIT-SRC
+//
+// Test per-TU split ('source' explicitely provided)
+// RUN: clang-sycl-linker --dry-run -v -triple=spirv64 
--module-split-mode=source %t-two-mod.bc -o %t-src.out 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=SPLIT-SRC
 // SPLIT-SRC:      sycl-device-link: inputs: {{.*}}.bc  libfiles:  output: 
[[LLVMLINKOUT:.*]].bc
 // SPLIT-SRC-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: 
[[S0:.*]].bc, [[S1:.*]].bc, mode: source
@@ -117,5 +112,5 @@
 // since only the kernel is an entry point.
 // RUN: llvm-as %S/Inputs/SYCL/external-fn.ll -o %t-ext.bc
 // RUN: clang-sycl-linker --dry-run -v -triple=spirv64 
--module-split-mode=source %t-ext.bc -o %t-ext.out 2>&1 \
-// RUN:   | FileCheck %s --check-prefix=SPLIT-EXT-DEFAULT
-// SPLIT-EXT-DEFAULT: sycl-module-split: input: {{.*}}.bc, output: 
[[S0:.*]].bc, mode: source
+// RUN:   | FileCheck %s --check-prefix=SPLIT-EXT-NO-ENTRY
+// SPLIT-EXT-NO-ENTRY: sycl-module-split: input: {{.*}}.bc, output: 
[[S0:.*]].bc, mode: source
diff --git a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp 
b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
index af2273dd33dbb..58fbd7706661d 100644
--- a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
+++ b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
@@ -513,6 +513,8 @@ static bool isEntryPoint(const Function &F, bool 
EmitOnlyKernelsAsEntryPoints) {
   if (EmitOnlyKernelsAsEntryPoints)
     return false;
   // sycl_external functions carry the "sycl-module-id" attribute.
+  // This branch is not reachable while EmitOnlyKernelsAsEntryPoints is
+  // hardcoded to true (see TODO in runSYCLLink).
   return F.hasFnAttribute(AttrSYCLModuleId);
 }
 
@@ -528,6 +530,12 @@ static SmallString<0> collectSymbols(const Module &M,
   return SymbolData;
 }
 
+/// Functor passed to splitModuleTransitiveFromEntryPoints. For each input \p 
F,
+/// returns a numeric group ID (if \p F is an entry point) determining which
+/// device image it lands in, or std::nullopt (for non-entry-points).
+/// SPLIT_PER_KERNEL \p Mode gives each kernel its own ID;
+/// SPLIT_PER_TU \p Mode groups kernels by their "sycl-module-id" attribute
+/// value.
 class EntryPointCategorizer {
 public:
   EntryPointCategorizer(IRSplitMode Mode, bool EmitOnlyKernelsAsEntryPoints)
@@ -562,34 +570,16 @@ class EntryPointCategorizer {
 
 /// Splits the fully linked device \p M into one bitcode file per device image
 /// according to \p Mode and returns the list of split images with their symbol
-/// tables.
-///
-/// For SPLIT_NONE, \p LinkedBitcodeFile is returned as-is.
-/// For all other modes the module is split transitively from entry points;
-/// each part is written to a fresh temporary bitcode file.
+/// tables. The module is split transitively from entry points; each part is
+/// written to a fresh temporary bitcode file.
 static Expected<SmallVector<SplitModule, 0>>
 splitDeviceCode(std::unique_ptr<Module> M, StringRef LinkedBitcodeFile,
                 IRSplitMode Mode, bool EmitOnlyKernelsAsEntryPoints,
                 const ArgList &Args) {
-  SmallVector<SplitModule, 0> SplitModules;
-
-  if (Mode == IRSplitMode::SPLIT_NONE) {
-    SplitModules.push_back({SmallString<256>(LinkedBitcodeFile),
-                            collectSymbols(*M, EmitOnlyKernelsAsEntryPoints)});
-    return SplitModules;
-  }
-
-  // splitModuleTransitiveFromEntryPoints requires at least one categorized
-  // entry point. Fall back to a single image if the module has none.
-  bool HasEntryPoint = llvm::any_of(M->functions(), [&](const Function &F) {
-    return isEntryPoint(F, EmitOnlyKernelsAsEntryPoints);
-  });
-  if (!HasEntryPoint) {
-    SplitModules.push_back({SmallString<256>(LinkedBitcodeFile),
-                            collectSymbols(*M, EmitOnlyKernelsAsEntryPoints)});
-    return SplitModules;
-  }
+  assert(Mode != IRSplitMode::SPLIT_NONE &&
+         "Any split method except None should be specified");
 
+  SmallVector<SplitModule, 0> SplitModules;
   EntryPointCategorizer Categorizer(Mode, EmitOnlyKernelsAsEntryPoints);
 
   auto SplitCallback = [&](std::unique_ptr<Module> Part) -> Error {
@@ -614,9 +604,29 @@ splitDeviceCode(std::unique_ptr<Module> M, StringRef 
LinkedBitcodeFile,
           std::move(M), Categorizer, SplitCallback))
     return Err;
 
+  if (Verbose || DryRun) {
+    SmallVector<StringRef> SplitFiles;
+    for (const SplitModule &SI : SplitModules)
+      SplitFiles.push_back(SI.ModuleFilePath);
+    errs() << formatv("sycl-module-split: input: {0}, output: {1}, mode: 
{2}\n",
+                      LinkedBitcodeFile, llvm::join(SplitFiles, ", "),
+                      splitModeToString(Mode));
+  }
+
   return SplitModules;
 }
 
+/// Returns true if module splitting can be skipped: either \p Mode is
+/// SPLIT_NONE, or \p M contains no entry points (nothing to split from).
+static bool checkModuleSplitCanBeSkipped(IRSplitMode Mode, const Module &M,
+                                         bool EmitOnlyKernelsAsEntryPoints) {
+  if (Mode == IRSplitMode::SPLIT_NONE)
+    return true;
+  return !llvm::any_of(M.functions(), [&](const Function &F) {
+    return isEntryPoint(F, EmitOnlyKernelsAsEntryPoints);
+  });
+}
+
 /// Performs the following steps:
 /// 1. Link input device code (user code and SYCL device library code).
 /// 2. Run SPIR-V code generation.
@@ -647,20 +657,20 @@ Error runSYCLLink(ArrayRef<std::string> Files, const 
ArgList &Args) {
   // can be called across device image boundaries.
   bool EmitOnlyKernelsAsEntryPoints = true;
 
-  // Split the linked module into one or more device images.
-  Expected<SmallVector<SplitModule, 0>> SplitModulesOrErr =
-      splitDeviceCode(std::move(LinkedModule), LinkedFile, SplitMode,
-                      EmitOnlyKernelsAsEntryPoints, Args);
-  if (!SplitModulesOrErr)
-    return SplitModulesOrErr.takeError();
-  SmallVector<SplitModule, 0> &SplitModules = *SplitModulesOrErr;
-  if (Verbose) {
-    SmallVector<StringRef> SplitFiles;
-    for (const SplitModule &SI : SplitModules)
-      SplitFiles.push_back(SI.ModuleFilePath);
-    errs() << formatv("sycl-module-split: input: {0}, output: {1}, mode: 
{2}\n",
-                      LinkedFile, llvm::join(SplitFiles, ", "),
-                      splitModeToString(SplitMode));
+  SmallVector<SplitModule, 0> SplitModules;
+  if (checkModuleSplitCanBeSkipped(SplitMode, *LinkedModule,
+                                   EmitOnlyKernelsAsEntryPoints)) {
+    SplitModules.push_back(
+        {SmallString<256>(LinkedFile),
+         collectSymbols(*LinkedModule, EmitOnlyKernelsAsEntryPoints)});
+  } else {
+    Expected<SmallVector<SplitModule, 0>> SplitModulesOrErr =
+        splitDeviceCode(std::move(LinkedModule), LinkedFile, SplitMode,
+                        EmitOnlyKernelsAsEntryPoints, Args);
+    if (!SplitModulesOrErr)
+      return SplitModulesOrErr.takeError();
+
+    SplitModules = std::move(*SplitModulesOrErr);
   }
 
   bool IsAOTCompileNeeded = IsIntelOffloadArch(
diff --git a/clang/tools/clang-sycl-linker/SYCLLinkOpts.td 
b/clang/tools/clang-sycl-linker/SYCLLinkOpts.td
index c60b06573d8f3..c6587d8d10fbf 100644
--- a/clang/tools/clang-sycl-linker/SYCLLinkOpts.td
+++ b/clang/tools/clang-sycl-linker/SYCLLinkOpts.td
@@ -58,10 +58,8 @@ def opencl_aot_options_EQ : Joined<["--", "-"], 
"opencl-aot-options=">,
 def module_split_mode_EQ : Joined<["--", "-"], "module-split-mode=">,
   Flags<[LinkerOnlyOption]>, MetaVarName<"<mode>">,
   HelpText<"SYCL device code module split mode. Valid values: "
-           "'source' (default) emits one device image per translation unit "
-           "that contains at least one kernel (grouped by the 'sycl-module-id' 
"
-           "attribute); translation units containing only sycl_external "
-           "functions do not produce a device image, this behavior may change "
-           "in the future; "
+           "'source' (default) emits one device image per translation unit 
that contains "
+           "at least one kernel; translation units containing only 
sycl_external "
+           "functions do not produce a device image, this behavior may change 
in the future; "
            "'kernel' emits one device image per kernel function; "
            "'none' emits a single device image.">;

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to