https://github.com/YuriPlyakhin updated 
https://github.com/llvm/llvm-project/pull/197571

>From 878b84984b21d97e4984f64cc4b878f75aa96197 Mon Sep 17 00:00:00 2001
From: "Plyakhin, Yury" <[email protected]>
Date: Wed, 13 May 2026 02:36:37 +0200
Subject: [PATCH] [clang-sycl-linker] Add per-translation-unit device code
 split mode

Add SPLIT_PER_TU mode that groups kernels by their "sycl-module-id"
attribute value (i.e., by source translation unit). Make it the default
split mode, replacing SPLIT_NONE.

Also fix a potential buffer invalidation bug in sycl::writeSymbolTable
where appending symbol names could reallocate the output buffer while
pointers into it were still live.

Co-Authored-By: Claude

clean up
---
 .../Tooling/clang-sycl-linker-split-mode.ll   |  65 ++++--
 clang/test/Tooling/clang-sycl-linker.ll       |  13 +-
 .../clang-sycl-linker/ClangSYCLLinker.cpp     | 188 ++++++++++++------
 clang/tools/clang-sycl-linker/SYCLLinkOpts.td |   8 +-
 llvm/lib/Frontend/Offloading/Utility.cpp      |   8 +-
 5 files changed, 187 insertions(+), 95 deletions(-)

diff --git a/clang/test/Tooling/clang-sycl-linker-split-mode.ll 
b/clang/test/Tooling/clang-sycl-linker-split-mode.ll
index 2b4b1cee4e171..7402b50549239 100644
--- a/clang/test/Tooling/clang-sycl-linker-split-mode.ll
+++ b/clang/test/Tooling/clang-sycl-linker-split-mode.ll
@@ -9,43 +9,70 @@
 ; RUN:   | FileCheck %s --check-prefix=SPLIT-INVALID
 ; SPLIT-INVALID: module-split-mode value isn't recognized: bogus
 ;
-; Test the split mode ("none"): no extra splits are produced.
+; Test the split mode ("none"): kernels from different TUs are not split into 
separate images.
 ; RUN: clang-sycl-linker --dry-run -v -triple=spirv64 --module-split-mode=none 
%t.bc -o %t-none.out 2>&1 \
 ; RUN:   | FileCheck %s --check-prefix=SPLIT-NONE
 ; SPLIT-NONE:      sycl-device-link: inputs: {{.*}}.bc  libfiles:  output: 
[[LLVMLINKOUT:.*]].bc
-; SPLIT-NONE-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: 
[[LLVMLINKOUT]].bc, mode: none
 ; SPLIT-NONE-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: 
{{.*}}_0.spv
-; SPLIT-NONE-NOT:  LLVM backend: input: {{.*}}.bc, output: {{.*}}_1.spv
+; SPLIT-NONE-NOT:  {{.+}}
 ;
-; Test per-kernel split: a module with two SPIR_KERNEL functions produces two
-; device images.
+; Test the split mode ("kernel"): each SPIR_KERNEL function produces its own 
device image.
 ; RUN: clang-sycl-linker --dry-run -v -triple=spirv64 
--module-split-mode=kernel %t.bc -o %t-split-kernel.out 2>&1 \
 ; RUN:   | FileCheck %s --check-prefix=SPLIT-KERNEL
 ; SPLIT-KERNEL:      sycl-device-link: inputs: {{.*}}.bc  libfiles:  output: 
[[LLVMLINKOUT:.*]].bc
-; SPLIT-KERNEL-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: 
[[SPLIT0:.*]].bc, [[SPLIT1:.*]].bc, mode: kernel
+; SPLIT-KERNEL-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, mode: kernel
+; SPLIT-KERNEL-NEXT: [[SPLIT0:.*]].bc [kernel_c ]
+; SPLIT-KERNEL-NEXT: [[SPLIT1:.*]].bc [kernel_b ]
+; SPLIT-KERNEL-NEXT: [[SPLIT2:.*]].bc [kernel_a ]
 ; SPLIT-KERNEL-NEXT: LLVM backend: input: [[SPLIT0]].bc, output: {{.*}}_0.spv
 ; SPLIT-KERNEL-NEXT: LLVM backend: input: [[SPLIT1]].bc, output: {{.*}}_1.spv
+; SPLIT-KERNEL-NEXT: LLVM backend: input: [[SPLIT2]].bc, output: {{.*}}_2.spv
+;
+; Test default split mode ('source'): no --module-split-mode flag needed.
+; Two kernels with different sycl-module-id values produce two device images.
+; sycl_external function is not treated as entry point and doesn't produce a 
separate image
+; despite having a different sycl-module-id.
+; RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t.bc -o %t-src.out 2>&1 
\
+; RUN:   | FileCheck %s --check-prefix=SPLIT-SRC
+;
+; Test per-TU split ('source' explicitly provided)
+; RUN: clang-sycl-linker --dry-run -v -triple=spirv64 
--module-split-mode=source %t.bc -o %t-src.out 2>&1 \
+; RUN:   | FileCheck %s --check-prefix=SPLIT-SRC
+; SPLIT-SRC:      sycl-device-link: inputs: {{.*}}.bc  libfiles:  output: 
[[LLVMLINKOUT:.*]].bc
+; SPLIT-SRC-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, mode: source
+; SPLIT-SRC-NEXT: [[S0:.*]].bc [kernel_b kernel_c ]
+; SPLIT-SRC-NEXT: [[S1:.*]].bc [kernel_a ]
+; SPLIT-SRC-NEXT: LLVM backend: input: [[S0]].bc, output: {{.*}}_0.spv
+; SPLIT-SRC-NEXT: LLVM backend: input: [[S1]].bc, output: {{.*}}_1.spv
+; SPLIT-SRC-NOT:  {{.+}}
 
 target datalayout = 
"e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1"
 target triple = "spirv64"
 
-define spir_func i32 @helper_shared(i32 %a) {
-entry:
-  %r = add nsw i32 %a, 1
-  ret i32 %r
+define spir_func i32 @helper() {
+  ret i32 0
+}
+
+define spir_kernel void @kernel_a() #0 {
+  %r = call spir_func i32 @helper()
+  ret void
 }
 
-define spir_kernel void @kernel_a(ptr addrspace(1) %out, i32 %a) {
-entry:
-  %r = tail call spir_func i32 @helper_shared(i32 %a)
-  store i32 %r, ptr addrspace(1) %out, align 4
+define spir_kernel void @kernel_b() #1 {
+  %r = call spir_func i32 @helper()
   ret void
 }
 
-define spir_kernel void @kernel_b(ptr addrspace(1) %out, i32 %a, i32 %b) {
-entry:
-  %h = tail call spir_func i32 @helper_shared(i32 %a)
-  %r = mul nsw i32 %h, %b
-  store i32 %r, ptr addrspace(1) %out, align 4
+define spir_kernel void @kernel_c() #1 {
+  %r = call spir_func i32 @helper()
   ret void
 }
+
+define spir_func i32 @ext_fn() #2 {
+  %r = call spir_func i32 @helper()
+  ret i32 0
+}
+
+attributes #0 = { "sycl-module-id"="TU1.cpp" }
+attributes #1 = { "sycl-module-id"="TU2.cpp" }
+attributes #2 = { "sycl-module-id"="TU3.cpp" }
diff --git a/clang/test/Tooling/clang-sycl-linker.ll 
b/clang/test/Tooling/clang-sycl-linker.ll
index cf0fb33d1bc06..958dc031b6899 100644
--- a/clang/test/Tooling/clang-sycl-linker.ll
+++ b/clang/test/Tooling/clang-sycl-linker.ll
@@ -7,11 +7,11 @@
 ; RUN: llvm-as %t/input2.ll -o %t/input2.bc
 ;
 ; Test the dry run of a simple case to link two input files.
-; RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t/input1.bc 
%t/input2.bc -o %t/spirv.out 2>&1 \
+; RUN: clang-sycl-linker --dry-run -v -triple=spirv64 --module-split-mode=none 
%t/input1.bc %t/input2.bc -o %t/spirv.out 2>&1 \
 ; RUN:   | FileCheck %s --check-prefix=SIMPLE-FO
 ; SIMPLE-FO:      sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc  libfiles:  
output: [[LLVMLINKOUT:.*]].bc
-; SIMPLE-FO-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: 
[[LLVMLINKOUT]].bc, mode: none
 ; SIMPLE-FO-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: {{.*}}_0.spv
+; SIMPLE-FO-NOT:  {{.+}}
 ;
 ; Test that IMG_SPIRV image kind is set for non-AOT compilation.
 ; RUN: llvm-objdump --offloading %t/spirv.out | FileCheck %s 
--check-prefix=IMAGE-KIND-SPIRV
@@ -21,10 +21,9 @@
 ; RUN: mkdir -p %t/libs
 ; RUN: touch %t/libs/lib1.bc
 ; RUN: touch %t/libs/lib2.bc
-; RUN: clang-sycl-linker --dry-run -v -triple=spirv64 %t/input1.bc 
%t/input2.bc --library-path=%t/libs --device-libs=lib1.bc,lib2.bc -o a.spv 2>&1 
\
+; RUN: clang-sycl-linker --dry-run -v -triple=spirv64 --module-split-mode=none 
%t/input1.bc %t/input2.bc --library-path=%t/libs --device-libs=lib1.bc,lib2.bc 
-o a.spv 2>&1 \
 ; RUN:   | FileCheck %s --check-prefix=DEVLIBS
 ; DEVLIBS:      sycl-device-link: inputs: {{.*}}.bc  libfiles: {{.*}}lib1.bc, 
{{.*}}lib2.bc  output: [[LLVMLINKOUT:.*]].bc
-; DEVLIBS-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: 
[[LLVMLINKOUT]].bc, mode: none
 ; DEVLIBS-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: a_0.spv
 ;
 ; Test a simple case with a random file (not bitcode) as input.
@@ -42,11 +41,10 @@
 ; DEVLIBSERR2: '{{.*}}lib3.bc' SYCL device library file is not found
 ;
 ; Test AOT compilation for an Intel GPU.
-; RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=bmg_g21 
%t/input1.bc %t/input2.bc -o %t/aot-gpu.out 2>&1 \
+; RUN: clang-sycl-linker --dry-run -v -triple=spirv64 --module-split-mode=none 
-arch=bmg_g21 %t/input1.bc %t/input2.bc -o %t/aot-gpu.out 2>&1 \
 ; RUN:     --ocloc-options="-a -b" \
 ; RUN:   | FileCheck %s --check-prefix=AOT-INTEL-GPU
 ; AOT-INTEL-GPU:      sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: 
output: [[LLVMLINKOUT:.*]].bc
-; AOT-INTEL-GPU-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: 
[[LLVMLINKOUT]].bc, mode: none
 ; AOT-INTEL-GPU-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: 
[[SPIRVTRANSLATIONOUT:.*]]_0.spv
 ; AOT-INTEL-GPU-NEXT: "{{.*}}ocloc{{.*}}" {{.*}}-device bmg_g21 -a -b 
{{.*}}-output [[SPIRVTRANSLATIONOUT]]_0.out -file [[SPIRVTRANSLATIONOUT]]_0.spv
 ;
@@ -55,11 +53,10 @@
 ; IMAGE-KIND-OBJECT: kind            elf
 ;
 ; Test AOT compilation for an Intel CPU.
-; RUN: clang-sycl-linker --dry-run -v -triple=spirv64 -arch=graniterapids 
%t/input1.bc %t/input2.bc -o %t/aot-cpu.out 2>&1 \
+; RUN: clang-sycl-linker --dry-run -v -triple=spirv64 --module-split-mode=none 
-arch=graniterapids %t/input1.bc %t/input2.bc -o %t/aot-cpu.out 2>&1 \
 ; RUN:     --opencl-aot-options="-a -b" \
 ; RUN:   | FileCheck %s --check-prefix=AOT-INTEL-CPU
 ; AOT-INTEL-CPU:      sycl-device-link: inputs: {{.*}}.bc, {{.*}}.bc libfiles: 
output: [[LLVMLINKOUT:.*]].bc
-; AOT-INTEL-CPU-NEXT: sycl-module-split: input: [[LLVMLINKOUT]].bc, output: 
[[LLVMLINKOUT]].bc, mode: none
 ; AOT-INTEL-CPU-NEXT: LLVM backend: input: [[LLVMLINKOUT]].bc, output: 
[[SPIRVTRANSLATIONOUT:.*]]_0.spv
 ; AOT-INTEL-CPU-NEXT: "{{.*}}opencl-aot{{.*}}" {{.*}}--device=cpu -a -b 
{{.*}}-o [[SPIRVTRANSLATIONOUT]]_0.out [[SPIRVTRANSLATIONOUT]]_0.spv
 ;
diff --git a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp 
b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
index 5a525d263427d..9c29f1b9f9fb4 100644
--- a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
+++ b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp
@@ -468,20 +468,36 @@ static Error runAOTCompile(StringRef InputFile, StringRef 
OutputFile,
   return createStringError(inconvertibleErrorCode(), "Unsupported arch");
 }
 
+static constexpr char AttrSYCLModuleId[] = "sycl-module-id";
+
 /// SYCL device code module split mode.
 enum class IRSplitMode {
+  SPLIT_PER_TU,     // one module per translation unit
   SPLIT_PER_KERNEL, // one module per kernel
   SPLIT_NONE        // no splitting
 };
 
-/// Parses the value of \p -module-split-mode.
+/// Parses the value of \p --module-split-mode.
 static std::optional<IRSplitMode> convertStringToSplitMode(StringRef S) {
   return StringSwitch<std::optional<IRSplitMode>>(S)
+      .Case("source", IRSplitMode::SPLIT_PER_TU)
       .Case("kernel", IRSplitMode::SPLIT_PER_KERNEL)
       .Case("none", IRSplitMode::SPLIT_NONE)
       .Default(std::nullopt);
 }
 
+static StringRef splitModeToString(IRSplitMode Mode) {
+  switch (Mode) {
+  case IRSplitMode::SPLIT_PER_TU:
+    return "source";
+  case IRSplitMode::SPLIT_PER_KERNEL:
+    return "kernel";
+  case IRSplitMode::SPLIT_NONE:
+    return "none";
+  }
+  llvm_unreachable("bad split mode");
+}
+
 /// Result of splitting a device module: the bitcode file path and the
 /// serialized symbol table for each device image.
 struct SplitModule {
@@ -489,63 +505,81 @@ struct SplitModule {
   SmallString<0> Symbols;
 };
 
-static bool isEntryPoint(const Function &F) {
-  return !F.isDeclaration() && F.hasKernelCallingConv();
+static bool isEntryPoint(const Function &F, bool EmitOnlyKernelsAsEntryPoints) 
{
+  if (F.isDeclaration())
+    return false;
+  if (F.hasKernelCallingConv())
+    return true;
+  if (EmitOnlyKernelsAsEntryPoints)
+    return false;
+  // sycl_external functions carry the "sycl-module-id" attribute.
+  // This branch is not reachable while EmitOnlyKernelsAsEntryPoints is
+  // hardcoded to true (see TODO in runSYCLLink).
+  return F.hasFnAttribute(AttrSYCLModuleId);
 }
 
-/// Collect kernel names from \p M and serialize them into a symbol table.
-static SmallString<0> collectSymbols(const Module &M) {
-  SmallVector<StringRef> KernelNames;
+/// Collect entry point names from \p M and serialize them into a symbol table.
+static SmallString<0> collectEntryPoints(const Module &M,
+                                         bool EmitOnlyKernelsAsEntryPoints) {
+  SmallVector<StringRef> Names;
   for (const Function &F : M)
-    if (isEntryPoint(F))
-      KernelNames.push_back(F.getName());
+    if (isEntryPoint(F, EmitOnlyKernelsAsEntryPoints))
+      Names.push_back(F.getName());
   SmallString<0> SymbolData;
-  llvm::offloading::sycl::writeSymbolTable(KernelNames, SymbolData);
+  llvm::offloading::sycl::writeSymbolTable(Names, SymbolData);
   return SymbolData;
 }
 
-/// Splits the fully linked device \p M into one bitcode file per device image
-/// according to \p Mode and returns the list of split images with their symbol
-/// tables.
-///
-/// For SPLIT_NONE, \p LinkedBitcodeFile is returned as-is.
-/// For SPLIT_PER_KERNEL, the module is split into parts such that each part
-/// contains exactly one kernel entry point and its transitive dependencies;
-/// each part is written to a fresh temporary bitcode file.
-static Expected<SmallVector<SplitModule, 0>>
-splitDeviceCode(std::unique_ptr<Module> M, StringRef LinkedBitcodeFile,
-                IRSplitMode Mode, const ArgList &Args) {
-  SmallVector<SplitModule, 0> SplitModules;
+/// Functor passed to splitModuleTransitiveFromEntryPoints. For each input \p 
F,
+/// returns a numeric group ID (if \p F is an entry point) determining which
+/// device image it lands in, or std::nullopt (for non-entry-points).
+/// SPLIT_PER_KERNEL \p Mode gives each kernel its own ID;
+/// SPLIT_PER_TU \p Mode groups kernels by their "sycl-module-id" attribute
+/// value.
+class EntryPointCategorizer {
+public:
+  EntryPointCategorizer(IRSplitMode Mode, bool EmitOnlyKernelsAsEntryPoints)
+      : Mode(Mode), OnlyKernelsAreEntryPoints(EmitOnlyKernelsAsEntryPoints) {}
 
-  if (Mode == IRSplitMode::SPLIT_NONE) {
-    SplitModules.push_back(
-        {SmallString<256>(LinkedBitcodeFile), collectSymbols(*M)});
-    return SplitModules;
-  }
+  std::optional<int> operator()(const Function &F) {
+    if (!isEntryPoint(F, OnlyKernelsAreEntryPoints))
+      return std::nullopt;
 
-  assert(Mode == IRSplitMode::SPLIT_PER_KERNEL);
+    std::string Key;
+    switch (Mode) {
+    case IRSplitMode::SPLIT_PER_KERNEL:
+      Key = F.getName().str();
+      break;
+    case IRSplitMode::SPLIT_PER_TU:
+      Key = F.getFnAttribute(AttrSYCLModuleId).getValueAsString().str();
+      break;
+    case IRSplitMode::SPLIT_NONE:
+      llvm_unreachable("categorizer cannot be used for SPLIT_NONE");
+    }
 
-  // splitModuleTransitiveFromEntryPoints asserts that at least one entry point
-  // was categorized. If the linked module contains no kernel definitions at
-  // all, there is nothing to split; fall back to shipping the linked module
-  // as a single image.
-  bool HasKernel = llvm::any_of(M->functions(), isEntryPoint);
-  if (!HasKernel) {
-    SplitModules.push_back(
-        {SmallString<256>(LinkedBitcodeFile), collectSymbols(*M)});
-    return SplitModules;
+    auto [It, Inserted] =
+        StrToId.try_emplace(std::move(Key), static_cast<int>(StrToId.size()));
+    return It->second;
   }
 
-  // Categorize each kernel function into its own group. Non-kernels and
-  // declarations return std::nullopt so they are pulled into whichever split
-  // transitively needs them.
-  int NextCategory = 0;
-  auto EntryPointCategorizer =
-      [&NextCategory](const Function &F) -> std::optional<int> {
-    if (!isEntryPoint(F))
-      return std::nullopt;
-    return NextCategory++;
-  };
+private:
+  IRSplitMode Mode;
+  bool OnlyKernelsAreEntryPoints;
+  llvm::StringMap<int> StrToId;
+};
+
+/// Splits the fully linked device \p M into one bitcode file per device image
+/// according to \p Mode and returns the list of split images with their symbol
+/// tables. The module is split transitively from entry points; each part is
+/// written to a fresh temporary bitcode file.
+static Expected<SmallVector<SplitModule, 0>>
+splitDeviceCode(std::unique_ptr<Module> M, StringRef LinkedBitcodeFile,
+                IRSplitMode Mode, bool EmitOnlyKernelsAsEntryPoints,
+                const ArgList &Args) {
+  assert(Mode != IRSplitMode::SPLIT_NONE && "SPLIT_NONE is unsupported");
+
+  SmallVector<SplitModule, 0> SplitModules;
+  EntryPointCategorizer Categorizer(Mode, EmitOnlyKernelsAsEntryPoints);
 
   auto SplitCallback = [&](std::unique_ptr<Module> Part) -> Error {
     Expected<StringRef> BitcodeFileOrErr =
@@ -560,17 +594,40 @@ splitDeviceCode(std::unique_ptr<Module> M, StringRef 
LinkedBitcodeFile,
     WriteBitcodeToFile(*Part, OS);
 
     SplitModules.push_back(
-        {SmallString<256>(*BitcodeFileOrErr), collectSymbols(*Part)});
+        {SmallString<256>(*BitcodeFileOrErr),
+         collectEntryPoints(*Part, EmitOnlyKernelsAsEntryPoints)});
     return Error::success();
   };
 
   if (Error Err = splitModuleTransitiveFromEntryPoints(
-          std::move(M), EntryPointCategorizer, SplitCallback))
+          std::move(M), Categorizer, SplitCallback))
     return Err;
 
+  if (Verbose || DryRun) {
+    errs() << formatv("sycl-module-split: input: {0}, mode: {1}\n",
+                      LinkedBitcodeFile, splitModeToString(Mode));
+    for (const SplitModule &SI : SplitModules) {
+      errs() << formatv("{0} [", SI.ModuleFilePath);
+      llvm::offloading::sycl::forEachSymbol(
+          SI.Symbols, [](StringRef Name) { errs() << Name << " "; });
+      errs() << "]\n";
+    }
+  }
+
   return SplitModules;
 }
 
+/// Returns true if module splitting can be skipped: either \p Mode is
+/// SPLIT_NONE, or \p M contains no entry points (nothing to split from).
+static bool checkModuleSplitCanBeSkipped(IRSplitMode Mode, const Module &M,
+                                         bool EmitOnlyKernelsAsEntryPoints) {
+  if (Mode == IRSplitMode::SPLIT_NONE)
+    return true;
+  return llvm::none_of(M.functions(), [&](const Function &F) {
+    return isEntryPoint(F, EmitOnlyKernelsAsEntryPoints);
+  });
+}
+
 /// Performs the following steps:
 /// 1. Link input device code (user code and SYCL device library code).
 /// 2. Run SPIR-V code generation.
@@ -586,7 +643,7 @@ Error runSYCLLink(ArrayRef<std::string> Files, const 
ArgList &Args) {
   auto &[LinkedModule, LinkedFile] = *LinkedOrErr;
 
   // Determine the requested module split mode.
-  IRSplitMode SplitMode = IRSplitMode::SPLIT_NONE;
+  IRSplitMode SplitMode = IRSplitMode::SPLIT_PER_TU;
   if (Arg *A = Args.getLastArg(OPT_module_split_mode_EQ)) {
     std::optional<IRSplitMode> ModeOrNone =
         convertStringToSplitMode(A->getValue());
@@ -596,20 +653,25 @@ Error runSYCLLink(ArrayRef<std::string> Files, const 
ArgList &Args) {
     SplitMode = *ModeOrNone;
   }
 
-  // Split the linked module into one or more device images.
-  Expected<SmallVector<SplitModule, 0>> SplitModulesOrErr =
-      splitDeviceCode(std::move(LinkedModule), LinkedFile, SplitMode, Args);
-  if (!SplitModulesOrErr)
-    return SplitModulesOrErr.takeError();
-  SmallVector<SplitModule, 0> &SplitModules = *SplitModulesOrErr;
-  if (Verbose) {
-    SmallVector<StringRef> SplitFiles;
-    for (const SplitModule &SI : SplitModules)
-      SplitFiles.push_back(SI.ModuleFilePath);
-    errs() << formatv("sycl-module-split: input: {0}, output: {1}, mode: 
{2}\n",
-                      LinkedFile, llvm::join(SplitFiles, ", "),
-                      SplitMode == IRSplitMode::SPLIT_PER_KERNEL ? "kernel"
-                                                                 : "none");
+  // TODO: Expose this as a command-line option and default it to false when
+  // device-image dynamic linking is supported, so that sycl_external functions
+  // can be called across device image boundaries.
+  bool EmitOnlyKernelsAsEntryPoints = true;
+
+  SmallVector<SplitModule, 0> SplitModules;
+  if (checkModuleSplitCanBeSkipped(SplitMode, *LinkedModule,
+                                   EmitOnlyKernelsAsEntryPoints)) {
+    SplitModules.push_back(
+        {SmallString<256>(LinkedFile),
+         collectEntryPoints(*LinkedModule, EmitOnlyKernelsAsEntryPoints)});
+  } else {
+    Expected<SmallVector<SplitModule, 0>> SplitModulesOrErr =
+        splitDeviceCode(std::move(LinkedModule), LinkedFile, SplitMode,
+                        EmitOnlyKernelsAsEntryPoints, Args);
+    if (!SplitModulesOrErr)
+      return SplitModulesOrErr.takeError();
+
+    SplitModules = std::move(*SplitModulesOrErr);
   }
 
   bool IsAOTCompileNeeded = IsIntelOffloadArch(
diff --git a/clang/tools/clang-sycl-linker/SYCLLinkOpts.td 
b/clang/tools/clang-sycl-linker/SYCLLinkOpts.td
index 740c5a4783ac9..171915c29bd93 100644
--- a/clang/tools/clang-sycl-linker/SYCLLinkOpts.td
+++ b/clang/tools/clang-sycl-linker/SYCLLinkOpts.td
@@ -57,6 +57,8 @@ def opencl_aot_options_EQ : Joined<["--", "-"], 
"opencl-aot-options=">,
 
 def module_split_mode_EQ : Joined<["--", "-"], "module-split-mode=">,
   Flags<[LinkerOnlyOption]>, MetaVarName<"<mode>">,
-  HelpText<"SYCL device code module split mode. Valid values: 'none' (default) 
"
-           "emits a single device image; 'kernel' emits one device image per "
-           "kernel function.">;
+  HelpText<"SYCL device code module split mode. Valid values:\n"
+           "- 'source' (default) emits one device image per translation unit 
that contains "
+           "at least one entry point;\n"
+           "- 'kernel' emits one device image per kernel function;\n"
+           "- 'none' emits a single device image.">;
diff --git a/llvm/lib/Frontend/Offloading/Utility.cpp 
b/llvm/lib/Frontend/Offloading/Utility.cpp
index d689d1bb192d6..909af0237910f 100644
--- a/llvm/lib/Frontend/Offloading/Utility.cpp
+++ b/llvm/lib/Frontend/Offloading/Utility.cpp
@@ -462,8 +462,12 @@ void sycl::writeSymbolTable(ArrayRef<StringRef> Names, 
SmallString<0> &Out) {
   uint32_t StringDataOffset =
       sizeof(SymbolTableHeader) + Count * sizeof(SymbolTableEntry);
 
-  // Pre-size the output to hold the header and entry array; string data is
-  // appended below.
+  // Compute total size and reserve to prevent reallocation while writing
+  // entries via pointer (append() could otherwise invalidate the pointer).
+  uint32_t TotalSize = StringDataOffset;
+  for (StringRef N : Names)
+    TotalSize += N.size() + 1;
+  Out.reserve(TotalSize);
   Out.resize(StringDataOffset);
 
   // Write the header.

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to