https://github.com/mmjjpp updated 
https://github.com/llvm/llvm-project/pull/198702

>From d9fc0bd6f1d5767f87eb515a665e76f6d00d1ea4 Mon Sep 17 00:00:00 2001
From: maojiaping <[email protected]>
Date: Wed, 20 May 2026 11:22:30 +0800
Subject: [PATCH 1/6] [ThinLTO][Split] Split module for parallel compilation in
 backend

An interface for splitting a module by callgraph is added. This
interface is called in the thinlto backend phase. The module is
split into N Mparts, and opt and codegen are performed on the
Mparts in parallel to implement parallel compilation in the
thinlto backend.
---
 .../llvm/Transforms/Utils/SplitModuleCG.h     |  34 ++
 llvm/lib/LTO/LTOBackend.cpp                   | 292 +++++++++++++++++-
 llvm/lib/Transforms/Utils/CMakeLists.txt      |   1 +
 llvm/lib/Transforms/Utils/SplitModuleCG.cpp   |  26 ++
 4 files changed, 336 insertions(+), 17 deletions(-)
 create mode 100644 llvm/include/llvm/Transforms/Utils/SplitModuleCG.h
 create mode 100644 llvm/lib/Transforms/Utils/SplitModuleCG.cpp

diff --git a/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h 
b/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h
new file mode 100644
index 0000000000000..e60c4e931d40c
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h
@@ -0,0 +1,34 @@
+#ifndef LLVM_TRANSFORMS_UTILS_SPLITMODULECG_H
+#define LLVM_TRANSFORMS_UTILS_SPLITMODULECG_H
+
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/ModuleSummaryAnalysis.h"
+#include "llvm/LTO/Config.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+
+namespace llvm {
+/// Splits the module M into N linkable partitions. The function ModuleCallback
+/// is called N times passing each individual partition as the MPart argument.
+class SplitModuleCG {
+public:
+  using ModuleCreationCallback =
+      function_ref<void(std::unique_ptr<Module> MPart, unsigned PartitionId)>;
+  SplitModuleCG(Module &M,
+                const ModuleSummaryIndex &CombinedIndex,
+                unsigned LimitPartition = 0);
+  void SplitModule(ModuleCreationCallback ModuleCallback,
+                   const llvm::lto::Config &C);
+
+  unsigned getPartitionNum() { return N; }
+
+  private:
+  unsigned N;
+  Module &M;
+  CallGraph CG;
+  DenseSet<const Function *> EntryFuncs;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_SPLITMODULECG_H
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index 73697a9d0d446..11200ade0e8c0 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -34,8 +34,10 @@
 #include "llvm/Plugins/PassPlugin.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
 #include "llvm/Support/ThreadPool.h"
 #include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/VirtualFileSystem.h"
@@ -45,6 +47,8 @@
 #include "llvm/Transforms/IPO/WholeProgramDevirt.h"
 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
 #include "llvm/Transforms/Utils/SplitModule.h"
+#include "llvm/Transforms/Utils/SplitModuleCG.h"
+#include <filesystem>
 #include <optional>
 
 using namespace llvm;
@@ -80,6 +84,23 @@ static cl::list<std::string>
                              "path matches this for -save-temps options"),
                     cl::CommaSeparated, cl::Hidden);
 
+static cl::opt<unsigned> ThinLTOSplitModuleSizeThreshold(
+    "thinlto-split-module-size-threshold", cl::Hidden, cl::init(500),
+    cl::desc("Control the amount of whether split in thinlto backend"
+             "accroding to the size of a module."));
+
+static cl::opt<float> ThinLTOSplitModuleSizeRateThreshold(
+    "thinlto-split-module-size-rate-threshold", cl::Hidden, cl::init(0.5),
+    cl::desc("Whether to split in thinlto backend based on the ratio of "
+             "(callgraph size)/(module size)"));
+
+static cl::opt<unsigned> ThinLTOSplitPartitions(
+    "thinlto-split-partitions", cl::Hidden, cl::init(0),
+    cl::desc("Control split to how many partitions in thinlto backend."));
+
+static cl::opt<bool> ThinLTOSplit("thinlto-split", cl::init(false),
+                          cl::desc("Enable split module in thinlto backend."));
+
 namespace llvm {
 extern cl::opt<bool> NoPGOWarnMismatch;
 }
@@ -124,12 +145,19 @@ Error Config::addSaveTemps(std::string OutputFileName, 
bool UseInputModulePath,
       if (LinkerHook && !LinkerHook(Task, M))
         return false;
 
+      auto extract_filename = [](const std::string &path) -> std::string {
+        std::filesystem::path fs_path(path);
+        return fs_path.filename().string();
+      };
+
       std::string PathPrefix;
       // If this is the combined module (not a ThinLTO backend compile) or the
       // user hasn't requested using the input module's path, emit to a file
       // named from the provided OutputFileName with the Task ID appended.
       if (M.getModuleIdentifier() == "ld-temp.o" || !UseInputModulePath) {
         PathPrefix = OutputFileName;
+        if (ThinLTOSplit)
+          PathPrefix += extract_filename(M.getSourceFileName()) + ".";
         if (Task != (unsigned)-1)
           PathPrefix += utostr(Task) + ".";
       } else
@@ -513,6 +541,212 @@ static void codegen(const Config &Conf, TargetMachine *TM,
     report_fatal_error(std::move(Err));
 }
 
+static unsigned calFunctionSize(const llvm::Function &F) {
+  unsigned size = 0;
+  for (const auto &BB : F)
+    size += std::distance(BB.begin(), BB.end());
+  return size;
+}
+
+static unsigned calModuleSize(const llvm::Module &M) {
+  unsigned size = 0;
+  for (const auto &F : M)
+    size += calFunctionSize(F);
+  return size;
+}
+
+static bool canDoSplitModule(const llvm::Module &M) {
+  if (calModuleSize(M) < ThinLTOSplitModuleSizeThreshold)
+    return false;
+  return true;
+}
+
+static bool HasLargeCG(Module &Mod, const ModuleSummaryIndex &CombinedIndex) {
+  // TODO: Check whether there has large callgraphs. When multiple callgraphs
+  // are split, thinlto parallel compilation can bring benefits.
+  return true;
+}
+
+struct TaskIdAllocator {
+  using TaskId = unsigned;
+
+  // Use the most significant bit (MSB) as a namespace tag.
+  // - Original ThinLTO backend tasks are expected to have MSB == 0.
+  // - Split partitions allocated by this allocator always have MSB == 1.
+  // This guarantees the two ID spaces never overlap.
+  static constexpr TaskId tag() {
+    return TaskId{1} << (std::numeric_limits<TaskId>::digits - 1);
+  }
+
+  // Monotonic sequence counter for split partitions (MSB must remain 0 here).
+  std::atomic<TaskId> seq{0};
+
+  // Allocate a globally unique TaskId for a split partition.
+  // The returned ID is `tag() | seq`, so it lives in the MSB==1 namespace.
+  TaskId alloc() {
+    TaskId v = seq.fetch_add(1, std::memory_order_relaxed);
+
+    // If the counter ever reaches the MSB, we'd overlap namespaces.
+    // This indicates an overflow / too many partitions.
+    if (v & tag())
+      report_fatal_error("Partition TaskId overflow: seq reached the tag 
bit.");
+
+    return tag() | v;
+  }
+
+  // Helper for sanity checks / debugging.
+  static bool isPartition(TaskId id) { return (id & tag()) != 0; }
+};
+
+// Global allocator shared by all split partitions.
+static TaskIdAllocator gSplitTaskIds;
+
+static bool splitOptAndCodeGenThin(unsigned task, const Config &C,
+                                   TargetMachine *TM, AddStreamFn AddStream,
+                                   unsigned ParallelCodeGenParallelismLevel,
+                                   Module &Mod,
+                                   const ModuleSummaryIndex &CombinedIndex,
+                                   const std::vector<uint8_t> &CmdArgs,
+                                   bool DoOpt, AddStreamFn IRAddStream,
+                                   ArrayRef<StringRef> &BitcodeLibFuncs) {
+  unsigned ThreadCount = 0;
+  const Target *T = &TM->getTarget();
+
+  static std::mutex PrintMutex;
+
+  SplitModuleCG SplitModuleCG(Mod, CombinedIndex, 
ParallelCodeGenParallelismLevel);
+  ParallelCodeGenParallelismLevel = SplitModuleCG.getPartitionNum();
+
+  std::vector<std::string> TempObjectFiles(ParallelCodeGenParallelismLevel);
+  std::vector<llvm::FileRemover> 
TempFileRemovers(ParallelCodeGenParallelismLevel);
+
+  const auto HandleModulePartition = [&](std::unique_ptr<Module> MPart,
+                                         unsigned PartitionId) {
+    unsigned CurrentThreadId, UniqueTaskId;
+    {
+      std::lock_guard<std::mutex> Lock(PrintMutex);
+      CurrentThreadId = ThreadCount++;
+
+      // In distributed ThinLTO, `task` may be a sentinel (e.g. -1 cast to
+      // unsigned), which becomes UINT_MAX and naturally has MSB==1. Treat it
+      // as "no base task id" and don't enforce the namespace check on it.
+      //
+      // We do not rely on the incoming `task` for partition uniqueness: split
+      // partitions get a dedicated UniqueTaskId allocated below.
+      if (task != std::numeric_limits<unsigned>::max()) {
+        assert(!TaskIdAllocator::isPartition(task) &&
+               "Original ThinLTO TaskId unexpectedly overlaps the partition "
+               "namespace");
+      }
+      UniqueTaskId = gSplitTaskIds.alloc();
+    }
+
+    std::unique_ptr<TargetMachine> ThreadTM = createTargetMachine(C, T, 
*MPart);
+
+    if (DoOpt) {
+      if (!opt(C, ThreadTM.get(), UniqueTaskId, *MPart, /*IsThinLTO=*/true,
+               /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex,
+               CmdArgs, BitcodeLibFuncs)) {
+        report_fatal_error("Failed to gen opt for split mod in thread.");
+      }
+
+      // Save the current module before the first codegen round.
+      // Note that the second codegen round runs only `codegen()` without
+      // running `opt()`. We're not reaching here as it's bailed out earlier
+      // with `CodeGenOnly` which has been set in `SecondRoundThinBackend`.
+      if (IRAddStream)
+        cgdata::saveModuleForTwoRounds(*MPart, task + CurrentThreadId,
+                                       IRAddStream);
+    }
+
+    auto splitStream = [&](unsigned task, const Twine &moduleName)
+        -> Expected<std::unique_ptr<CachedFileStream>> {
+      int FD;
+      SmallString<128> TempFilename;
+      if (std::error_code EC = sys::fs::createTemporaryFile(
+              "thinlto-split", "o", FD, TempFilename))
+        return errorCodeToError(EC);
+
+      TempObjectFiles[PartitionId] = std::string(TempFilename.str());
+      TempFileRemovers[PartitionId].setFile(TempObjectFiles[PartitionId]);
+
+      auto OS = std::make_unique<raw_fd_ostream>(
+          FD, true, /*CloseOnDestruct*/true);
+
+      auto Stream = std::make_unique<CachedFileStream>(
+          std::move(OS), std::string(TempFilename.str()));
+
+      return std::move(Stream);
+    };
+
+    codegen(C, ThreadTM.get(), splitStream, UniqueTaskId, *MPart,
+            CombinedIndex);
+  };
+
+  SplitModuleCG.SplitModule(HandleModulePartition, C);
+
+  // Use ld.lld to combine the partitions into a object.
+  if (TempObjectFiles.empty()) {
+    llvm::errs() << "TempObjectFiles.empty()\n";
+    return true;
+  }
+
+  auto FinalStream = AddStream(task, Mod.getModuleIdentifier());
+  if (!FinalStream)
+    report_fatal_error("Failed to open final output stream");
+
+  int MergedFD;
+  SmallString<128> MergedFilename;
+  if (sys::fs::createTemporaryFile("thinlto-merged", "o", MergedFD,
+                                   MergedFilename))
+    report_fatal_error("Failed to create merged temp file.");
+  llvm::FileRemover MergedFileRemover(MergedFilename);
+  sys::fs::closeFile(MergedFD);
+
+  std::vector<StringRef> Args;
+  std::string LinkerPath = "";
+  if (auto Path = sys::findProgramByName("ld.lld"))
+    LinkerPath = *Path;
+  else if (auto Path = sys::findProgramByName("ld"))
+    LinkerPath = *Path;
+
+  if (LinkerPath.empty())
+    report_fatal_error("Cannot find linkeer (ld or ld.lld) to merge 
partitions.");
+
+  Args.push_back(LinkerPath);
+  Args.push_back("-r");
+  Args.push_back("-o");
+  Args.push_back(MergedFilename);
+
+  for (const auto &File : TempObjectFiles)
+    Args.push_back(File);
+
+  std::string ErrMsg;
+  int Result = sys::ExecuteAndWait(LinkerPath, Args, /*Env=*/std::nullopt,
+                                   /*Redirects=*/{}, /*SecondsToWait=*/0,
+                                   /*MemoryLimit=*/0, &ErrMsg);
+
+  if (Result != 0) {
+    errs() << "Linker failed: " << ErrMsg << "\n";
+    report_fatal_error("Failed to merge split objects.");
+  }
+
+  {
+    std::unique_ptr<CachedFileStream> &FinalFileStream = *FinalStream;
+    auto BufferOrErr = MemoryBuffer::getFile(MergedFilename);
+    if (!BufferOrErr)
+      report_fatal_error("Failed to read merged object.");
+
+    FinalFileStream->OS->write(BufferOrErr.get()->getBufferStart(),
+                               BufferOrErr.get()->getBufferSize());
+    if (Error Err = FinalFileStream->commit()) {
+      report_fatal_error(Twine("Failed to commit final file stream: ") +
+                         toString(std::move(Err)));
+    }
+  }
+  return true;
+}
+
 static void splitCodeGen(const Config &C, TargetMachine *TM,
                          AddStreamFn AddStream,
                          unsigned ParallelCodeGenParallelismLevel, Module &Mod,
@@ -677,11 +911,28 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, 
AddStreamFn AddStream,
   // the module, if applicable.
   Mod.setPartialSampleProfileRatio(CombinedIndex);
 
+  bool ProfitableToSplit = true;
+  if (ThinLTOSplit) {
+    if (!canDoSplitModule(Mod) || !HasLargeCG(Mod, CombinedIndex)) {
+      ProfitableToSplit = false;
+      LLVM_DEBUG(dbgs() << "warning: thinlto split not enable for module: "
+                        << Mod.getName());
+    } else {
+      LLVM_DEBUG(dbgs() << "thinlto: split codegen for module: "
+                        << Mod.getName());
+    }
+  }
+
   LLVM_DEBUG(dbgs() << "Running ThinLTO\n");
   if (CodeGenOnly) {
-    // If CodeGenOnly is set, we only perform code generation and skip
-    // optimization. This value may differ from Conf.CodeGenOnly.
-    codegen(Conf, TM.get(), AddStream, Task, Mod, CombinedIndex);
+    if (ThinLTOSplit && ProfitableToSplit)
+      splitOptAndCodeGenThin(Task, Conf, TM.get(), AddStream,
+                             ThinLTOSplitPartitions, Mod, CombinedIndex,
+                             CmdArgs, false, IRAddStream, BitcodeLibFuncs);
+    else
+      // If CodeGenOnly is set, we only perform code generation and skip
+      // optimization. This value may differ from Conf.CodeGenOnly.
+      codegen(Conf, TM.get(), AddStream, Task, Mod, CombinedIndex);
     return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
   }
 
@@ -691,20 +942,27 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, 
AddStreamFn AddStream,
   auto OptimizeAndCodegen =
       [&](Module &Mod, TargetMachine *TM,
           LLVMRemarkFileHandle DiagnosticOutputFile) {
-        // Perform optimization and code generation for ThinLTO.
-        if (!opt(Conf, TM, Task, Mod, /*IsThinLTO=*/true,
-                 /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex,
-                 CmdArgs, BitcodeLibFuncs))
-          return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
-
-        // Save the current module before the first codegen round.
-        // Note that the second codegen round runs only `codegen()` without
-        // running `opt()`. We're not reaching here as it's bailed out earlier
-        // with `CodeGenOnly` which has been set in `SecondRoundThinBackend`.
-        if (IRAddStream)
-          cgdata::saveModuleForTwoRounds(Mod, Task, IRAddStream);
-
-        codegen(Conf, TM, AddStream, Task, Mod, CombinedIndex);
+        if (ThinLTOSplit && ProfitableToSplit) {
+          if (!splitOptAndCodeGenThin(
+                  Task, Conf, TM, AddStream, ThinLTOSplitPartitions, Mod,
+                  CombinedIndex, CmdArgs, true, IRAddStream, BitcodeLibFuncs))
+            return 
finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
+        } else {
+          // Perform optimization and code generation for ThinLTO.
+          if (!opt(Conf, TM, Task, Mod, /*IsThinLTO=*/true,
+                  /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex,
+                  CmdArgs, BitcodeLibFuncs))
+            return 
finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
+
+          // Save the current module before the first codegen round.
+          // Note that the second codegen round runs only `codegen()` without
+          // running `opt()`. We're not reaching here as it's bailed out 
earlier
+          // with `CodeGenOnly` which has been set in `SecondRoundThinBackend`.
+          if (IRAddStream)
+            cgdata::saveModuleForTwoRounds(Mod, Task, IRAddStream);
+
+          codegen(Conf, TM, AddStream, Task, Mod, CombinedIndex);
+        }
         return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
       };
 
diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt 
b/llvm/lib/Transforms/Utils/CMakeLists.txt
index 8fe0476ab1a32..01b44ae2cfa29 100644
--- a/llvm/lib/Transforms/Utils/CMakeLists.txt
+++ b/llvm/lib/Transforms/Utils/CMakeLists.txt
@@ -89,6 +89,7 @@ add_llvm_component_library(LLVMTransformUtils
   SizeOpts.cpp
   SplitModule.cpp
   SplitModuleByCategory.cpp
+  SplitModuleCG.cpp
   StripNonLineTableDebugInfo.cpp
   SymbolRewriter.cpp
   UnifyFunctionExitNodes.cpp
diff --git a/llvm/lib/Transforms/Utils/SplitModuleCG.cpp 
b/llvm/lib/Transforms/Utils/SplitModuleCG.cpp
new file mode 100644
index 0000000000000..9f57cb3ed566e
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/SplitModuleCG.cpp
@@ -0,0 +1,26 @@
+#include "llvm/Transforms/Utils/SplitModuleCG.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "split-module-CG"
+
+void SplitModuleCG::SplitModule(ModuleCreationCallback ModuleCallback,
+                                const llvm::lto::Config &C) {
+  // TODO: 1. Process the linkage of the GlobalValue; 2. Allocate the callgraph
+  // to N partitions; 3.Invoke the cloneModule API to copy the N partitions to
+  // obtain MParts.
+
+}
+
+SplitModuleCG::SplitModuleCG(Module &M,
+                             const ModuleSummaryIndex &CombinedIndex,
+                             unsigned LimitPartition)
+    : M(M), CG(M), N(LimitPartition) {
+  // TODO: The module is split based on the callgraph, and EntryFuncs stores
+  // the root function of each callgraph.
+
+  if (N == 0 || N > EntryFuncs.size()) {
+    N = EntryFuncs.size();
+  }
+  N = N == 0 ? 1 : N;
+}

>From b80904856475a12f4c6a010c1730324406d7b595 Mon Sep 17 00:00:00 2001
From: maojiaping <[email protected]>
Date: Wed, 20 May 2026 15:27:29 +0800
Subject: [PATCH 2/6] [Thinlto][Split] Add callgraph-based module
 splitting(SplitModuleCG)

Add a new SplitModuleCG that partitions a module into multiple
parts using function callgraph traversal and cost-based load balancing.
This is intended for use in thinLTO to parallelize code generation by
splitting the module while preserving function call dependencies.

Key features:
- Build a simplified callgraph to track function calls and roots
- Calculate function costs based on IR instruction count
- Partition functions with balanced cost distribution
- Externalize local symbols and rename promoted symbols to avoid
  conflicts
- Clone module partitions and emit them in parallel
---
 .../llvm/Transforms/Utils/SplitModuleCG.h     | 182 ++++++++-
 llvm/lib/LTO/LTOBackend.cpp                   |  10 +
 llvm/lib/Transforms/Utils/SplitModuleCG.cpp   | 367 +++++++++++++++++-
 3 files changed, 552 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h 
b/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h
index e60c4e931d40c..956a1ea8030fe 100644
--- a/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h
+++ b/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h
@@ -1,6 +1,7 @@
 #ifndef LLVM_TRANSFORMS_UTILS_SPLITMODULECG_H
 #define LLVM_TRANSFORMS_UTILS_SPLITMODULECG_H
 
+#include "llvm/ADT/StringSet.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
 #include "llvm/LTO/Config.h"
@@ -8,6 +9,169 @@
 #include "llvm/ADT/DenseSet.h"
 
 namespace llvm {
+
+class SimplifyCallGraph;
+class SimplifyCallGraphNode;
+
+using CostType = InstructionCost::CostType;
+
+class SimplifyCallGraph {
+  using FunctionMapTy =
+      std::map<const Function *, std::unique_ptr<SimplifyCallGraphNode>>;
+
+  /// A map from \c Function* to \c SimplifyCallGraphNode*.
+  FunctionMapTy FunctionMap;
+
+public:
+  explicit SimplifyCallGraph(CallGraph &CG,
+                             const ModuleSummaryIndex &CombinedIndex,
+                             Module &M)
+      : CG(CG), M(M) {
+    createSimplifyCallGraph(CombinedIndex);
+  }
+  ~SimplifyCallGraph() {};
+
+  using iterator = FunctionMapTy::iterator;
+  using const_iterator = FunctionMapTy::const_iterator;
+
+  /// Returns the module the call graph corresponds to.
+  inline iterator begin() { return FunctionMap.begin(); }
+  inline iterator end() { return FunctionMap.end(); }
+  inline const_iterator begin() const { return FunctionMap.begin(); }
+  inline const_iterator end() const { return FunctionMap.end(); }
+
+  /// Returns the call graph node for the provided function.
+  inline const SimplifyCallGraphNode *operator[](const Function *F) const {
+    const_iterator I = FunctionMap.find(F);
+    assert(I != FunctionMap.end() && "Function not in callgraph!");
+    return I->second.get();
+  }
+
+  /// Returns the call graph node for the provided function.
+  inline SimplifyCallGraphNode *operator[](const Function *F) {
+    const_iterator I = FunctionMap.find(F);
+    assert(I != FunctionMap.end() && "Function not in callgraph!");
+    return I->second.get(); 
+  }
+
+  /// Returns the call graph node for the provided function.
+  inline const SimplifyCallGraphNode *at(const Function *F) const {
+    const_iterator I = FunctionMap.find(F);
+    assert(I != FunctionMap.end() && "Function not in callgraph!");
+    return I->second.get();
+  }
+
+  /// Returns the call graph node for the provided function.
+  inline SimplifyCallGraphNode *at(const Function *F) {
+    const_iterator I = FunctionMap.find(F);
+    assert(I != FunctionMap.end() && "Function not in callgraph!");
+    return I->second.get();
+  }
+
+  void createSimplifyCallGraph(const ModuleSummaryIndex &CombinedIndex);
+  void print();
+  SimplifyCallGraphNode *getOrInsertFunction(const Function *F);
+
+private:
+  CallGraph &CG;
+  Module &M;
+};
+
+class SimplifyCallGraphNode {
+public:
+  using CalledFunctionsSet = DenseSet<SimplifyCallGraphNode *>;
+  inline SimplifyCallGraphNode(SimplifyCallGraph *SCG, Function *F)
+      : SCG(SCG), F(F) {}
+
+  SimplifyCallGraphNode(const SimplifyCallGraphNode &) = delete;
+  SimplifyCallGraphNode &operator=(const SimplifyCallGraphNode &) = delete;
+
+  ~SimplifyCallGraphNode() {}
+
+  Function *getFunction() const { return F; }
+
+  unsigned getNumReferences() const { return NumReferences; }
+
+  using iterator = DenseSet<SimplifyCallGraphNode *>::iterator;
+  using const_iterator = DenseSet<SimplifyCallGraphNode *>::const_iterator;
+
+  inline iterator begin() { return CalledFunctions.begin(); }
+  inline iterator end() { return CalledFunctions.end(); }
+  inline const_iterator begin() const { return CalledFunctions.begin(); }
+  inline const_iterator end() const { return CalledFunctions.end(); }
+  inline size_t count(SimplifyCallGraphNode * SCGNode) { return 
CalledFunctions.count(SCGNode); }
+  inline bool empty() const { return CalledFunctions.empty(); }
+  inline unsigned size() const { return (unsigned)CalledFunctions.size(); }
+
+  void addCalledFunction(SimplifyCallGraphNode *Called) {
+    auto [It, Inserted] = CalledFunctions.insert(Called);
+    if (Inserted)
+      Called->AddRef();
+  }
+
+  void removeCalledFunction(SimplifyCallGraphNode *Called) {
+    auto NumRemoved = CalledFunctions.erase(Called);
+    if (NumRemoved > 0)
+      Called->DropRef();
+  }
+
+private:
+  friend class SimplifyCallGraph;
+
+  SimplifyCallGraph *SCG;
+  Function *F;
+
+  DenseSet<SimplifyCallGraphNode *> CalledFunctions;
+  unsigned NumReferences = 0;
+
+  void DropRef() { --NumReferences; }
+  void AddRef() { ++NumReferences; }
+};
+
+static void addAllDependencies(SimplifyCallGraph &SCG, const Function &F,
+                               DenseSet<const Function *> &Fns) {
+  assert(!F.isDeclaration());
+  SmallVector<const Function *> WorkList({&F});
+
+  while (!WorkList.empty()) {
+    const auto &CurFn = *WorkList.pop_back_val();
+    assert(!CurFn.isDeclaration());
+
+    // Scan for an indirect call. If such a call is found, we have to
+    // conservatively assume this can call all non-entrypoint functions in 
+    // the module.
+    for (auto &SCGNode : *SCG.at(&CurFn)) {
+      auto *Callee = SCGNode->getFunction();
+      if (!Callee || Callee->isDeclaration())
+        continue;
+      if (Callee != &F)
+      {
+        auto [It, Inserted] = Fns.insert(Callee);
+        if (Inserted)
+          WorkList.push_back(Callee);
+      }
+    }
+  }
+}
+
+struct FunctionWithDependencies {
+  FunctionWithDependencies(SimplifyCallGraph &SCG,
+                           const DenseMap<const Function *, CostType> &FnCosts,
+                           const Function *F)
+      : F(F) {
+    addAllDependencies(SCG, *F, Dependencies);
+
+    TotalCost = FnCosts.at(F);
+    for (const auto *Dep : Dependencies) {
+      TotalCost += FnCosts.lookup(Dep);
+    }
+  }
+
+  const Function *F = nullptr;
+  DenseSet<const Function *> Dependencies;
+  CostType TotalCost = 0;
+};
+
 /// Splits the module M into N linkable partitions. The function ModuleCallback
 /// is called N times passing each individual partition as the MPart argument.
 class SplitModuleCG {
@@ -21,12 +185,28 @@ class SplitModuleCG {
                    const llvm::lto::Config &C);
 
   unsigned getPartitionNum() { return N; }
+  StringSet<> &getOriginalExternals() { return OriginalExternals; }
+  StringMap<std::string> &getPromotedRenames() { return PromotedRenames; }
 
-  private:
+private:
   unsigned N;
   Module &M;
   CallGraph CG;
+  std::unique_ptr<SimplifyCallGraph> SCG;
+  CostType ModuleCost;
   DenseSet<const Function *> EntryFuncs;
+  StringSet<> OriginalExternals;
+  StringMap<std::string> PromotedRenames;
+  DenseMap<const Function *, bool> externalFunction;
+  DenseMap<const Function *, CostType> FuncsCosts;
+  SmallVector<FunctionWithDependencies> FWDWorkList;
+
+  void calculateFunctionCosts();
+  std::vector<DenseSet<const Function *>> doPartitioning();
+  void dealWithMpart(
+      Module &MPart, unsigned I,
+      function_ref<bool(const GlobalValue *)> NeedsConservativeImport);
+  void createWorkList();
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index 11200ade0e8c0..aa1213e5e6af1 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -658,6 +658,16 @@ static bool splitOptAndCodeGenThin(unsigned task, const 
Config &C,
         cgdata::saveModuleForTwoRounds(*MPart, task + CurrentThreadId,
                                        IRAddStream);
     }
+    
+    // Rename the GlobalValues whose internal is changed to external. That's
+    // can avoid duplicate symbols.
+    auto PromotedRenames = SplitModuleCG.getPromotedRenames();
+    for (auto &GV : MPart->global_values()) {
+      if (auto It = PromotedRenames.find(GV.getName());
+          It != PromotedRenames.end()) {
+        GV.setName(It->second);
+      }
+    }
 
     auto splitStream = [&](unsigned task, const Twine &moduleName)
         -> Expected<std::unique_ptr<CachedFileStream>> {
diff --git a/llvm/lib/Transforms/Utils/SplitModuleCG.cpp 
b/llvm/lib/Transforms/Utils/SplitModuleCG.cpp
index 9f57cb3ed566e..debdddfb79041 100644
--- a/llvm/lib/Transforms/Utils/SplitModuleCG.cpp
+++ b/llvm/lib/Transforms/Utils/SplitModuleCG.cpp
@@ -1,26 +1,381 @@
 #include "llvm/Transforms/Utils/SplitModuleCG.h"
-
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/MD5.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include <thread>
 using namespace llvm;
 
 #define DEBUG_TYPE "split-module-CG"
 
+namespace {
+
+static cl::opt<bool> enablePrintSimplifyCallGraph(
+    "enable-print-simplify-callgraph", cl::Hidden, cl::init(false),
+    cl::desc("print SimplifyCallGraph"));
+
+using PartitionID = unsigned;
+
+static void externalize(GlobalValue *GV) {
+  if (GV->hasLocalLinkage()) {
+    GV->setLinkage(GlobalValue::ExternalLinkage);
+    GV->setVisibility(GlobalValue::HiddenVisibility);
+  }
+
+  // Unnamed entities must be named consistently between modules. setName will
+  // give a distinct name to each such entity.
+  if (!GV->hasName())
+    GV->setName("__llvmsplit_unnamed");
+}
+
+} // namespace
+
+std::vector<DenseSet<const Function *>> SplitModuleCG::doPartitioning() {
+  LLVM_DEBUG(dbgs() << "\n--Partitioning Starts--\n");
+  // Performs all of the partitioning work on M.
+  std::vector<DenseSet<const Function *>> Partitions;
+  Partitions.resize(N);
+  if (N == 0)
+    return Partitions;
+
+  auto ComparePartitions = [](const std::pair<PartitionID, CostType> &a,
+                              const std::pair<PartitionID, CostType> &b) {
+    // When two partitions have the same cost, assign to the one with the
+    // biggest ID first. This allows us to put things in P0 last, because P0 
may
+    // have other stuff added later.
+    if (a.second == b.second)
+      return a.first < b.first;
+    return a.second > b.second;
+  };
+
+  std::vector<std::pair<PartitionID, CostType>> BalancingQueue;
+  for (unsigned I = 0; I < N; ++I)
+    BalancingQueue.emplace_back(I, 0);
+
+  // Helper function to handle assigning a function to a partition. This takes
+  // care of updating the balancing queue.
+  const auto AssignToPartition = [&](PartitionID PID,
+                                     const FunctionWithDependencies &FWD) {
+    auto &FnsInPart = Partitions[PID];
+    FnsInPart.insert(FWD.F);
+    for (const Function *Dep : FWD.Dependencies) {
+      FnsInPart.insert(Dep);
+    }
+
+    // Update the balancing queue. we scan backwards because in the common case
+    // the partition is at the end.
+    for (auto &[QueuePID, Cost] : reverse(BalancingQueue)) {
+      if (QueuePID == PID) {
+        CostType NewCost = 0;
+        for (auto *Fn : Partitions[PID])
+          NewCost += FuncsCosts.at(Fn);
+        Cost = NewCost;
+      }
+    }
+
+    sort(BalancingQueue, ComparePartitions);
+  };
+
+  for (auto &CurFn : FWDWorkList) {
+    // Normal "load-balancing", assign to partition with least pressure.
+    auto [PID, CurCost] = BalancingQueue.back();
+    AssignToPartition(PID, CurFn);
+  }
+
+  return Partitions;
+}
+
+void SplitModuleCG::calculateFunctionCosts() {
+  ModuleCost = 0;
+  for (auto &Fn : M) {
+    if (Fn.isDeclaration())
+      continue;
+
+    CostType FnCost = 0;
+    for (const auto &BB : Fn) {
+      CostType CostVal = std::distance(BB.begin(), BB.end());
+      FnCost += CostVal;
+    }
+    assert(FnCost != 0);
+    FuncsCosts[&Fn] = FnCost;
+    assert((ModuleCost + FnCost) >= ModuleCost && "Overflow!");
+    ModuleCost += FnCost;
+  }
+}
+
+void SplitModuleCG::dealWithMpart(Module &MPart, unsigned I,
+                                  function_ref<bool(const GlobalValue *)> 
NeedsConservativeImport) {
+  // collect symbols to rename
+  auto checkPromoted = [&](const GlobalValue &GV) {
+    // now is external (not local), but not in external set.
+    if (!GV.hasLocalLinkage() && !OriginalExternals.contains(GV.getName())) {
+      if (PromotedRenames.count(GV.getName()))
+        return;
+      MD5 Hash;
+      Hash.update(M.getModuleIdentifier());
+      MD5::MD5Result Result;
+      Hash.final(Result);
+      SmallString<32> HashStr;
+      MD5::stringifyResult(Result, HashStr);
+      std::string NewName = (GV.getName() + "." + HashStr.str().substr(0, 
8)).str();
+      PromotedRenames[GV.getName()] = NewName;
+    }
+  };
+
+  auto AvailableExternalizeFunc = [&](llvm::Function &Func) {
+    Func.setLinkage(GlobalValue::AvailableExternallyLinkage);
+    Func.setComdat(nullptr);
+  };
+
+  for (const auto &GV : MPart.global_values())
+    checkPromoted(GV);
+  // Clean-up conservatively imported GVs without any users.
+  for (auto &GV : make_early_inc_range(MPart.globals())) {
+    if (NeedsConservativeImport(&GV) && GV.use_empty())
+      GV.eraseFromParent();
+  }
+
+  for (auto &func : MPart.functions()) {
+    auto Fn = M.getFunction(func.getName());
+    if (externalFunction.count(Fn) && !func.isDeclaration()) {
+      if (!externalFunction[Fn]) {
+        AvailableExternalizeFunc(func);
+      } else {
+        externalFunction[Fn] = false;
+      }
+    }
+  }
+
+  LLVM_DEBUG(dbgs() << MPart.getModuleIdentifier() << "  : \n");
+  for (auto &F : MPart) {
+    if (!F.isDeclaration())
+      LLVM_DEBUG(dbgs() << "   [Function: ] " << I << "  " << F.getName() << " 
"
+                        << F.getLinkage() << "\n");
+  }
+}
+
+void SplitModuleCG::createWorkList() {
+  // First, find all the entry functions with an in-degree of 0
+  // (i.e., those that are not called by any function).
+  for (auto &NodePair : *SCG) {
+    SimplifyCallGraphNode *SCGNode = NodePair.second.get();
+    Function *F = SCGNode->getFunction();
+    if (F && SCGNode->getNumReferences() == 0) {
+      EntryFuncs.insert(F);
+    }
+  }
+
+  // Second, find all the dependencies of each entry function.
+  for (auto *F : EntryFuncs) {
+    FWDWorkList.emplace_back(*SCG, FuncsCosts, F);
+  }
+
+  // Third, find all the functions that are not in the worklist.
+  DenseSet<const Function *> SeenFunctions;
+  for (const auto &FWD : FWDWorkList) {
+    SeenFunctions.insert(FWD.F);
+    SeenFunctions.insert(FWD.Dependencies.begin(), FWD.Dependencies.end());
+  }
+  for (auto &F : M) {
+    // This function may be in a ring, and therefore is not a dependency of
+    // any root, which is treated as a root function here.
+    if (!F.isDeclaration() && !SeenFunctions.count(&F)) {
+      FWDWorkList.emplace_back(*SCG, FuncsCosts, &F);
+      auto &FWD = FWDWorkList.back();
+      EntryFuncs.insert(&F);
+      SeenFunctions.insert(FWD.F);
+      SeenFunctions.insert(FWD.Dependencies.begin(), FWD.Dependencies.end());
+    }
+  }
+
+  // Sort the worklist so the most expensive roots are seen first.
+  sort(FWDWorkList, [&](auto &A, auto &B) {
+    // Sort by total cost, and if the total cost is identical, sort
+    // alphabetically
+    if (A.TotalCost == B.TotalCost)
+      return A.F->getName() < B.F->getName();
+    return A.TotalCost > B.TotalCost;
+  });
+
+  LLVM_DEBUG(dbgs() << "Number of callgraphs to be allocated: "
+                    << FWDWorkList.size() << "   Module cost: "
+                    << ModuleCost << "\n");
+  LLVM_DEBUG(dbgs() << "callgraphs: \n");
+  for (auto FWD : FWDWorkList) {
+    LLVM_DEBUG(dbgs() << "[root] " << FWD.F->getName() << " (totalCost:"
+                      << FWD.TotalCost << ";   root function cost: "
+                      << FuncsCosts[FWD.F] << ";   has dependency: "
+                      << FWD.Dependencies.size() << "\n");
+  }
+}
+
 void SplitModuleCG::SplitModule(ModuleCreationCallback ModuleCallback,
                                 const llvm::lto::Config &C) {
-  // TODO: 1. Process the linkage of the GlobalValue; 2. Allocate the callgraph
-  // to N partitions; 3.Invoke the cloneModule API to copy the N partitions to
-  // obtain MParts.
+  for (Function &F : M) {
+    if (F.hasLocalLinkage() && F.hasOneUse() && !F.hasAddressTaken())
+      continue;
+    externalize(&F);
+    if (!F.isDeclaration() &&
+        (F.hasExternalLinkage() || !F.isDefinitionExact()))
+      externalFunction[&F] = true;
+  }
+  for (GlobalVariable &GV : M.globals())
+    externalize(&GV);
+  for (GlobalAlias &GA : M.aliases())
+    externalize(&GA);
+  for (GlobalIFunc &GI : M.ifuncs())
+    externalize(&GI);
 
+  // TODO: Consider optimizing the alias, replacing the determined alias with
+  // the determined aliasee.
+
+  // Assign callgraphs into N partitions.
+  auto Partitions = doPartitioning();
+  assert(Partitions.size() == N);
+
+  // local GVs need to be conservatively imported into [dependency] every 
module,
+       // and then cleaned up afterwards.
+  const auto NeedsConservativeImport = [&](const GlobalValue *GV) {
+    // We conservatively import private/internal GVs into every module and 
clean
+    // them up afterwards.
+    const auto *Var = dyn_cast<GlobalVariable>(GV);
+    return Var && Var->hasLocalLinkage();
+  };
+
+  auto ShouldCloneDefinition = [&](unsigned I, const GlobalValue *GV) {
+    const auto &FnsInPart = Partitions[I];
+
+    // Functions go in their assigned partition.
+    if (const auto *newFn = dyn_cast<Function>(GV)) {
+      const auto *Fn = M.getFunction(newFn->getName());
+      return FnsInPart.contains(Fn);
+    }
+    if (NeedsConservativeImport(GV))
+      return true;
+    // Everything else goes in the first partition.
+    return I == 0;
+  };
+
+  // TODO: In the future, it may be considered to also include clonemodule in
+  // parallel to reduce compilation time.
+  std::vector<std::thread> Threads;
+  Threads.reserve(N);
+  std::vector<std::unique_ptr<Module>> MPartInCtxs;
+  MPartInCtxs.resize(N);
+  for (unsigned I = 0; I < N; ++I) {
+    ValueToValueMapTy VMap;
+    std::unique_ptr<Module> MPart(
+      CloneModule(M, VMap, [&](const GlobalValue *GV) {
+        return ShouldCloneDefinition(I, GV);
+    }));
+
+    dealWithMpart(*MPart, I, NeedsConservativeImport);
+
+    // If not clone module in multi-thread, we also need to clone
+    // the module obtained through segmentation into a new context
+    // to avoid data races.
+    SmallString<0> BC;
+    raw_svector_ostream BCOS(BC);
+    WriteBitcodeToFile(*MPart, BCOS);
+    MPart.reset();
+    Threads.emplace_back([&, I](SmallString<0> BC) {
+      llvm::lto::LTOLLVMContext Ctx(C);
+      Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile(
+          MemoryBufferRef(BC.str(), "ld-temp.o"), Ctx);
+      BC = SmallString<0>();
+      if (!MOrErr)
+        report_fatal_error("Failed to read bitcode");
+      ModuleCallback(std::move(MOrErr.get()), I);
+    }, std::move(BC));
+  }
+  for (auto &T : Threads)
+    T.join();
 }
 
 SplitModuleCG::SplitModuleCG(Module &M,
                              const ModuleSummaryIndex &CombinedIndex,
                              unsigned LimitPartition)
     : M(M), CG(M), N(LimitPartition) {
-  // TODO: The module is split based on the callgraph, and EntryFuncs stores
-  // the root function of each callgraph.
+  // Track existing non-local symbols. This ensures that when we promote
+  // internal symbols to external for partitioning, we can handle renaming
+  // and avoid conflicts.
+  for (const auto &GV : M.global_values())
+    if (!GV.hasLocalLinkage())
+      OriginalExternals.insert(GV.getName());
+
+  calculateFunctionCosts();
+
+  // Construct a simplified call graph to facilitate worklist generation.
+  SCG = std::make_unique<SimplifyCallGraph>(CG, CombinedIndex, M);
+  // TODO: When the SCG is established, the special cases of comdat and
+  // initarray need to be considered.
+
+  // Populate the worklist with root functions and their transitive
+  // dependencies. This worklist serves as the foundation for the
+  // subsequent module partitioning.
+  createWorkList();
 
   if (N == 0 || N > EntryFuncs.size()) {
     N = EntryFuncs.size();
   }
   N = N == 0 ? 1 : N;
 }
+
+void SimplifyCallGraph::createSimplifyCallGraph(
+    const ModuleSummaryIndex &CombinedIndex) {
+  for (auto &NodePair : CG) {
+    CallGraphNode *CGNode = NodePair.second.get();
+    Function *F = CGNode->getFunction();
+    if (!F || F->isDeclaration())
+      continue;
+
+    SimplifyCallGraphNode *SCGNode = getOrInsertFunction(F);
+
+    //TODO: Trace indirect call usage for the current function.
+
+    for (const auto &CGNodeItem : *CGNode) {
+      Function *Called = CGNodeItem.second->getFunction();
+      if (!Called) {
+        //TODO: Deal with indirect call. 
+        // 1. Check if the instruction has a callees metadata.
+        // 2. Check if this is an indirect call with profile data.
+        // 3. Check if this is an alias to a function.
+      }
+      if (!Called || Called->isDeclaration())
+        continue;
+      SCGNode->addCalledFunction(getOrInsertFunction(Called));
+    }
+  }
+
+  if (enablePrintSimplifyCallGraph)
+    print();
+}
+
+
+void SimplifyCallGraph::print() {
+  for (auto &SCGItem : FunctionMap) {
+    LLVM_DEBUG(dbgs() << "Call graph node for function: '"
+                      << SCGItem.first->getName() << "' #uses="
+                      << SCGItem.second->getNumReferences() << "\n");
+
+    for (const auto &callee : *SCGItem.second) {
+      LLVM_DEBUG(dbgs() <<"          Calls function : '"
+                        << callee->getFunction()->getName() << " '\n");
+    }
+  }
+}
+
+SimplifyCallGraphNode *
+SimplifyCallGraph::getOrInsertFunction(const Function *F) {
+  auto &SCGN = FunctionMap[F];
+  if (SCGN)
+    return SCGN.get();
+
+  SCGN =
+      std::make_unique<SimplifyCallGraphNode>(this, const_cast<Function *>(F));
+  return SCGN.get();
+}

>From 88db8d4e7fbcadc73e1c48c23bb8781b2c21df4f Mon Sep 17 00:00:00 2001
From: maojiaping <[email protected]>
Date: Wed, 20 May 2026 15:57:13 +0800
Subject: [PATCH 3/6] [llvm-split][SplitModuleCG] Add support for SplitModuleCG

Add a new command line option --enable-split-module-CG to llvm-split
tool for testing the SplitModuleCG utility.

The change:
- Adds --enable-split-module-CG flag
- Wire up the SplitModuleCG interface in llvm-split
---
 .../SplitModuleCG/split-promoted-rename.ll    | 41 +++++++++++++++++++
 .../SplitModuleCG/function-with-ring.ll       | 36 ++++++++++++++++
 .../llvm-split/SplitModuleCG/function.ll      | 35 ++++++++++++++++
 .../llvm-split/SplitModuleCG/partition-cap.ll | 10 +++++
 .../SplitModuleCG/single-partition.ll         | 13 ++++++
 .../tools/llvm-split/SplitModuleCG/unnamed.ll |  8 ++++
 llvm/tools/llvm-split/llvm-split.cpp          | 36 ++++++++++++++++
 7 files changed, 179 insertions(+)
 create mode 100644 llvm/test/Transforms/SplitModuleCG/split-promoted-rename.ll
 create mode 100644 
llvm/test/tools/llvm-split/SplitModuleCG/function-with-ring.ll
 create mode 100644 llvm/test/tools/llvm-split/SplitModuleCG/function.ll
 create mode 100644 llvm/test/tools/llvm-split/SplitModuleCG/partition-cap.ll
 create mode 100644 llvm/test/tools/llvm-split/SplitModuleCG/single-partition.ll
 create mode 100644 llvm/test/tools/llvm-split/SplitModuleCG/unnamed.ll

diff --git a/llvm/test/Transforms/SplitModuleCG/split-promoted-rename.ll 
b/llvm/test/Transforms/SplitModuleCG/split-promoted-rename.ll
new file mode 100644
index 0000000000000..6c51141a9ad85
--- /dev/null
+++ b/llvm/test/Transforms/SplitModuleCG/split-promoted-rename.ll
@@ -0,0 +1,41 @@
+; Test that internal symbols promoted during module splitting are consistently
+; renamed with an MD5 suffix across all partitions.
+;
+; RUN: opt -module-summary %s -o %t.bc
+; RUN: llvm-lto2 run %t.bc -o %t \
+; RUN:   -thinlto-split=true \
+; RUN:   -thinlto-split-partitions=2 -thinlto-split-module-size-threshold=0 \
+; RUN:   -r=%t.bc,caller_a,px \
+; RUN:   -r=%t.bc,caller_b,px
+; RUN: llvm-nm %t.1 | FileCheck %s
+
+; CHECK-DAG: T caller_a
+; CHECK-DAG: T caller_b
+; CHECK:     T {{.*promoted_internal[._][0-9a-f]+.*}}
+; CHECK-NOT: T promoted_internal{{$}}
+
+target datalayout = 
"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; @promoted_internal is internal. SplitModuleCG::dealWithMpart's checkPromoted
+; records it in PromotedRenames. splitOptAndCodeGenThin applies the rename
+; after opt via:
+;   for (auto &GV : MPart->global_values())
+;     if (auto It = PromotedRenames.find(GV.getName()); ...)
+;       GV.setName(It->second);
+define internal void @promoted_internal() {
+entry:
+  ret void
+}
+
+define void @caller_a() {
+entry:
+  call void @promoted_internal()
+  ret void
+}
+
+define void @caller_b() {
+entry:
+  call void @promoted_internal()
+  ret void
+}
diff --git a/llvm/test/tools/llvm-split/SplitModuleCG/function-with-ring.ll 
b/llvm/test/tools/llvm-split/SplitModuleCG/function-with-ring.ll
new file mode 100644
index 0000000000000..f2fc8c03c922a
--- /dev/null
+++ b/llvm/test/tools/llvm-split/SplitModuleCG/function-with-ring.ll
@@ -0,0 +1,36 @@
+; RUN: llvm-split -enable-split-module-CG=true -j2 -o %t %s
+; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
+; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
+
+; CHECK0-DAG: declare void @foo()
+; CHECK0-DAG: define void @bar()
+; CHECK0-DAG: declare void @call_foo()
+; CHECK0-DAG: define void @call_bar()
+
+; CHECK1-DAG: define void @foo()
+; CHECK1-DAG: declare void @bar()
+; CHECK1-DAG: define void @call_foo()
+; CHECK1-DAG: declare void @call_bar()
+
+define void @foo() {
+entry:
+  call void @call_foo()
+  ret void
+}
+
+define void @bar() {
+entry:
+  ret void
+}
+
+define void @call_foo() {
+entry:
+  call void @foo()
+  ret void
+}
+
+define void @call_bar() {
+entry:
+  call void @bar()
+  ret void
+}
diff --git a/llvm/test/tools/llvm-split/SplitModuleCG/function.ll 
b/llvm/test/tools/llvm-split/SplitModuleCG/function.ll
new file mode 100644
index 0000000000000..ddf5bb5c3dff3
--- /dev/null
+++ b/llvm/test/tools/llvm-split/SplitModuleCG/function.ll
@@ -0,0 +1,35 @@
+; RUN: llvm-split -enable-split-module-CG=true -j2 -o %t %s
+; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
+; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
+
+; CHECK0-DAG: declare dso_local void @foo()
+; CHECK0-DAG: define void @bar()
+; CHECK0-DAG: declare void @func_a()
+; CHECK0-DAG: define void @func_b()
+; CHECK1-DAG: define internal void @foo()
+; CHECK1-DAG: define available_externally void @bar()
+; CHECK1-DAG: define void @func_a()
+; CHECK1-DAG: declare void @func_b()
+
+define internal void @foo() {
+entry:
+  ret void
+}
+
+define void @bar() {
+entry:
+  ret void
+}
+
+define void @func_a() {
+entry:
+  call void @foo()
+  call void @bar()
+  ret void
+}
+
+define void @func_b() {
+entry:
+  call void @bar()
+  ret void
+}
diff --git a/llvm/test/tools/llvm-split/SplitModuleCG/partition-cap.ll 
b/llvm/test/tools/llvm-split/SplitModuleCG/partition-cap.ll
new file mode 100644
index 0000000000000..5c3ced3e682af
--- /dev/null
+++ b/llvm/test/tools/llvm-split/SplitModuleCG/partition-cap.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-split -enable-split-module-CG=true -j10 -o %t %s
+; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
+; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
+; should only produce 2 output files (N capped to EntryFuncs.size()=2)
+
+; CHECK0: define void @foo()
+; CHECK1: define void @bar()
+
+define void @foo() { ret void }
+define void @bar() { ret void }
diff --git a/llvm/test/tools/llvm-split/SplitModuleCG/single-partition.ll 
b/llvm/test/tools/llvm-split/SplitModuleCG/single-partition.ll
new file mode 100644
index 0000000000000..fdfdf910a3498
--- /dev/null
+++ b/llvm/test/tools/llvm-split/SplitModuleCG/single-partition.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-split -enable-split-module-CG=true -j1 -o %t %s
+; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
+
+; CHECK0: define void @foo()
+; CHECK0: define void @bar()
+
+define void @foo() {
+  call void @bar()
+  ret void
+}
+define void @bar() {
+  ret void
+}
diff --git a/llvm/test/tools/llvm-split/SplitModuleCG/unnamed.ll 
b/llvm/test/tools/llvm-split/SplitModuleCG/unnamed.ll
new file mode 100644
index 0000000000000..73f7079669c55
--- /dev/null
+++ b/llvm/test/tools/llvm-split/SplitModuleCG/unnamed.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-split -enable-split-module-CG=true -j2 -o %t %s
+; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
+
+; CHECK0-DAG: define hidden void @__llvmsplit_unnamed()
+
+define internal void @0() {
+  ret void
+}
\ No newline at end of file
diff --git a/llvm/tools/llvm-split/llvm-split.cpp 
b/llvm/tools/llvm-split/llvm-split.cpp
index 4cc4fd945fc53..4156222855617 100644
--- a/llvm/tools/llvm-split/llvm-split.cpp
+++ b/llvm/tools/llvm-split/llvm-split.cpp
@@ -18,8 +18,10 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/PassInstrumentation.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/IRReader/IRReader.h"
+#include "llvm/LTO/Config.h"
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
@@ -35,6 +37,7 @@
 #include "llvm/Transforms/IPO/GlobalDCE.h"
 #include "llvm/Transforms/Utils/SplitModule.h"
 #include "llvm/Transforms/Utils/SplitModuleByCategory.h"
+#include "llvm/Transforms/Utils/SplitModuleCG.h"
 
 using namespace llvm;
 
@@ -76,6 +79,10 @@ static cl::opt<std::string>
 static cl::opt<std::string>
     MCPU("mcpu", cl::desc("Target CPU, ignored if --mtriple is not used"),
          cl::value_desc("cpu"), cl::cat(SplitCategory));
+         
+static cl::opt<bool>
+    EnableSplitModuleCG("enable-split-module-CG", cl::Prefix, cl::init(false),
+     cl::desc("Split module using call graph"), cl::cat(SplitCategory));
 
 enum class SplitByCategoryType {
   SBCT_ByAttribute,
@@ -327,6 +334,35 @@ int main(int argc, char **argv) {
               "splitModule implementation\n";
   }
 
+  if (EnableSplitModuleCG) {
+    const auto HandleModulePartCG = [&](std::unique_ptr<Module> MPart, 
unsigned I) {
+      std::error_code EC;
+      std::unique_ptr<ToolOutputFile> Out(
+          new ToolOutputFile(OutputFilename + utostr(I), EC, 
sys::fs::OF_None));
+      if (EC) {
+        errs() << EC.message() << '\n';
+        exit(1);
+      }
+
+      if (verifyModule(*MPart, &errs())) {
+        errs() << "Broken module!\n";
+        exit(1);
+      }
+
+      WriteBitcodeToFile(*MPart, Out->os());
+
+      // Declare success.
+      Out->keep();
+    };
+
+    llvm::lto::Config Config;
+    ModuleSummaryIndex CombinedIndex(false);
+    SplitModuleCG SplitModuleCG(*M, CombinedIndex, NumOutputs);
+    SplitModuleCG.SplitModule(HandleModulePartCG, Config);
+    return 0;
+  }
+
   SplitModule(*M, NumOutputs, HandleModulePart, PreserveLocals, RoundRobin);
   return 0;
 }
+

>From 073f4c1ce6305c1867ca607db45c66843e392f63 Mon Sep 17 00:00:00 2001
From: maojiaping <[email protected]>
Date: Fri, 12 Jun 2026 15:17:07 +0800
Subject: [PATCH 4/6] [SplitModuleCG] Fix warning errors

- Remove unused variable.
- Fix constructor initialization order to match class
  declaration order (N, M, CG).
---
 llvm/include/llvm/Transforms/Utils/SplitModuleCG.h | 6 ++----
 llvm/lib/LTO/LTOBackend.cpp                        | 1 -
 llvm/lib/Transforms/Utils/SplitModuleCG.cpp        | 2 +-
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h 
b/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h
index 956a1ea8030fe..9836376b94a82 100644
--- a/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h
+++ b/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h
@@ -26,7 +26,7 @@ class SimplifyCallGraph {
   explicit SimplifyCallGraph(CallGraph &CG,
                              const ModuleSummaryIndex &CombinedIndex,
                              Module &M)
-      : CG(CG), M(M) {
+      : CG(CG) {
     createSimplifyCallGraph(CombinedIndex);
   }
   ~SimplifyCallGraph() {};
@@ -74,14 +74,13 @@ class SimplifyCallGraph {
 
 private:
   CallGraph &CG;
-  Module &M;
 };
 
 class SimplifyCallGraphNode {
 public:
   using CalledFunctionsSet = DenseSet<SimplifyCallGraphNode *>;
   inline SimplifyCallGraphNode(SimplifyCallGraph *SCG, Function *F)
-      : SCG(SCG), F(F) {}
+      : F(F) {}
 
   SimplifyCallGraphNode(const SimplifyCallGraphNode &) = delete;
   SimplifyCallGraphNode &operator=(const SimplifyCallGraphNode &) = delete;
@@ -118,7 +117,6 @@ class SimplifyCallGraphNode {
 private:
   friend class SimplifyCallGraph;
 
-  SimplifyCallGraph *SCG;
   Function *F;
 
   DenseSet<SimplifyCallGraphNode *> CalledFunctions;
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index aa1213e5e6af1..cfe196a74e1b2 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -609,7 +609,6 @@ static bool splitOptAndCodeGenThin(unsigned task, const 
Config &C,
                                    const std::vector<uint8_t> &CmdArgs,
                                    bool DoOpt, AddStreamFn IRAddStream,
                                    ArrayRef<StringRef> &BitcodeLibFuncs) {
-  unsigned ThreadCount = 0;
   const Target *T = &TM->getTarget();
 
   static std::mutex PrintMutex;
diff --git a/llvm/lib/Transforms/Utils/SplitModuleCG.cpp 
b/llvm/lib/Transforms/Utils/SplitModuleCG.cpp
index debdddfb79041..c50111204e1f0 100644
--- a/llvm/lib/Transforms/Utils/SplitModuleCG.cpp
+++ b/llvm/lib/Transforms/Utils/SplitModuleCG.cpp
@@ -299,7 +299,7 @@ void SplitModuleCG::SplitModule(ModuleCreationCallback 
ModuleCallback,
 SplitModuleCG::SplitModuleCG(Module &M,
                              const ModuleSummaryIndex &CombinedIndex,
                              unsigned LimitPartition)
-    : M(M), CG(M), N(LimitPartition) {
+    : N(LimitPartition), M(M), CG(M) {
   // Track existing non-local symbols. This ensures that when we promote
   // internal symbols to external for partitioning, we can handle renaming
   // and avoid conflicts.

>From 76ed2093f94ceed85d596fb4371326c784bb7c3e Mon Sep 17 00:00:00 2001
From: maojiaping <[email protected]>
Date: Mon, 22 Jun 2026 09:09:02 +0800
Subject: [PATCH 5/6] [ThinLTO][SplitModuleCG] Trim non-core code

---
 llvm/lib/LTO/LTOBackend.cpp                   | 199 +-----------------
 .../SplitModuleCG/split-promoted-rename.ll    |  41 ----
 2 files changed, 10 insertions(+), 230 deletions(-)
 delete mode 100644 llvm/test/Transforms/SplitModuleCG/split-promoted-rename.ll

diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index cfe196a74e1b2..2608e2eb54398 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -84,16 +84,6 @@ static cl::list<std::string>
                              "path matches this for -save-temps options"),
                     cl::CommaSeparated, cl::Hidden);
 
-static cl::opt<unsigned> ThinLTOSplitModuleSizeThreshold(
-    "thinlto-split-module-size-threshold", cl::Hidden, cl::init(500),
-    cl::desc("Control the amount of whether split in thinlto backend"
-             "accroding to the size of a module."));
-
-static cl::opt<float> ThinLTOSplitModuleSizeRateThreshold(
-    "thinlto-split-module-size-rate-threshold", cl::Hidden, cl::init(0.5),
-    cl::desc("Whether to split in thinlto backend based on the ratio of "
-             "(callgraph size)/(module size)"));
-
 static cl::opt<unsigned> ThinLTOSplitPartitions(
     "thinlto-split-partitions", cl::Hidden, cl::init(0),
     cl::desc("Control split to how many partitions in thinlto backend."));
@@ -541,66 +531,6 @@ static void codegen(const Config &Conf, TargetMachine *TM,
     report_fatal_error(std::move(Err));
 }
 
-static unsigned calFunctionSize(const llvm::Function &F) {
-  unsigned size = 0;
-  for (const auto &BB : F)
-    size += std::distance(BB.begin(), BB.end());
-  return size;
-}
-
-static unsigned calModuleSize(const llvm::Module &M) {
-  unsigned size = 0;
-  for (const auto &F : M)
-    size += calFunctionSize(F);
-  return size;
-}
-
-static bool canDoSplitModule(const llvm::Module &M) {
-  if (calModuleSize(M) < ThinLTOSplitModuleSizeThreshold)
-    return false;
-  return true;
-}
-
-static bool HasLargeCG(Module &Mod, const ModuleSummaryIndex &CombinedIndex) {
-  // TODO: Check whether there has large callgraphs. When multiple callgraphs
-  // are split, thinlto parallel compilation can bring benefits.
-  return true;
-}
-
-struct TaskIdAllocator {
-  using TaskId = unsigned;
-
-  // Use the most significant bit (MSB) as a namespace tag.
-  // - Original ThinLTO backend tasks are expected to have MSB == 0.
-  // - Split partitions allocated by this allocator always have MSB == 1.
-  // This guarantees the two ID spaces never overlap.
-  static constexpr TaskId tag() {
-    return TaskId{1} << (std::numeric_limits<TaskId>::digits - 1);
-  }
-
-  // Monotonic sequence counter for split partitions (MSB must remain 0 here).
-  std::atomic<TaskId> seq{0};
-
-  // Allocate a globally unique TaskId for a split partition.
-  // The returned ID is `tag() | seq`, so it lives in the MSB==1 namespace.
-  TaskId alloc() {
-    TaskId v = seq.fetch_add(1, std::memory_order_relaxed);
-
-    // If the counter ever reaches the MSB, we'd overlap namespaces.
-    // This indicates an overflow / too many partitions.
-    if (v & tag())
-      report_fatal_error("Partition TaskId overflow: seq reached the tag 
bit.");
-
-    return tag() | v;
-  }
-
-  // Helper for sanity checks / debugging.
-  static bool isPartition(TaskId id) { return (id & tag()) != 0; }
-};
-
-// Global allocator shared by all split partitions.
-static TaskIdAllocator gSplitTaskIds;
-
 static bool splitOptAndCodeGenThin(unsigned task, const Config &C,
                                    TargetMachine *TM, AddStreamFn AddStream,
                                    unsigned ParallelCodeGenParallelismLevel,
@@ -611,39 +541,15 @@ static bool splitOptAndCodeGenThin(unsigned task, const 
Config &C,
                                    ArrayRef<StringRef> &BitcodeLibFuncs) {
   const Target *T = &TM->getTarget();
 
-  static std::mutex PrintMutex;
-
   SplitModuleCG SplitModuleCG(Mod, CombinedIndex, 
ParallelCodeGenParallelismLevel);
   ParallelCodeGenParallelismLevel = SplitModuleCG.getPartitionNum();
 
-  std::vector<std::string> TempObjectFiles(ParallelCodeGenParallelismLevel);
-  std::vector<llvm::FileRemover> 
TempFileRemovers(ParallelCodeGenParallelismLevel);
-
   const auto HandleModulePartition = [&](std::unique_ptr<Module> MPart,
                                          unsigned PartitionId) {
-    unsigned CurrentThreadId, UniqueTaskId;
-    {
-      std::lock_guard<std::mutex> Lock(PrintMutex);
-      CurrentThreadId = ThreadCount++;
-
-      // In distributed ThinLTO, `task` may be a sentinel (e.g. -1 cast to
-      // unsigned), which becomes UINT_MAX and naturally has MSB==1. Treat it
-      // as "no base task id" and don't enforce the namespace check on it.
-      //
-      // We do not rely on the incoming `task` for partition uniqueness: split
-      // partitions get a dedicated UniqueTaskId allocated below.
-      if (task != std::numeric_limits<unsigned>::max()) {
-        assert(!TaskIdAllocator::isPartition(task) &&
-               "Original ThinLTO TaskId unexpectedly overlaps the partition "
-               "namespace");
-      }
-      UniqueTaskId = gSplitTaskIds.alloc();
-    }
-
     std::unique_ptr<TargetMachine> ThreadTM = createTargetMachine(C, T, 
*MPart);
 
     if (DoOpt) {
-      if (!opt(C, ThreadTM.get(), UniqueTaskId, *MPart, /*IsThinLTO=*/true,
+      if (!opt(C, ThreadTM.get(), PartitionId, *MPart, /*IsThinLTO=*/true,
                /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex,
                CmdArgs, BitcodeLibFuncs)) {
         report_fatal_error("Failed to gen opt for split mod in thread.");
@@ -654,7 +560,7 @@ static bool splitOptAndCodeGenThin(unsigned task, const 
Config &C,
       // running `opt()`. We're not reaching here as it's bailed out earlier
       // with `CodeGenOnly` which has been set in `SecondRoundThinBackend`.
       if (IRAddStream)
-        cgdata::saveModuleForTwoRounds(*MPart, task + CurrentThreadId,
+        cgdata::saveModuleForTwoRounds(*MPart, PartitionId,
                                        IRAddStream);
     }
     
@@ -668,91 +574,18 @@ static bool splitOptAndCodeGenThin(unsigned task, const 
Config &C,
       }
     }
 
-    auto splitStream = [&](unsigned task, const Twine &moduleName)
-        -> Expected<std::unique_ptr<CachedFileStream>> {
-      int FD;
-      SmallString<128> TempFilename;
-      if (std::error_code EC = sys::fs::createTemporaryFile(
-              "thinlto-split", "o", FD, TempFilename))
-        return errorCodeToError(EC);
-
-      TempObjectFiles[PartitionId] = std::string(TempFilename.str());
-      TempFileRemovers[PartitionId].setFile(TempObjectFiles[PartitionId]);
-
-      auto OS = std::make_unique<raw_fd_ostream>(
-          FD, true, /*CloseOnDestruct*/true);
-
-      auto Stream = std::make_unique<CachedFileStream>(
-          std::move(OS), std::string(TempFilename.str()));
-
-      return std::move(Stream);
-    };
-
-    codegen(C, ThreadTM.get(), splitStream, UniqueTaskId, *MPart,
+    // FIXME: For distributed ThinLTO, the current 'Addstream' callbcak needs
+    // to be reconstructed to support emitting multiple split submodules.
+    codegen(C, ThreadTM.get(), AddStream, PartitionId, *MPart,
             CombinedIndex);
   };
 
   SplitModuleCG.SplitModule(HandleModulePartition, C);
 
-  // Use ld.lld to combine the partitions into a object.
-  if (TempObjectFiles.empty()) {
-    llvm::errs() << "TempObjectFiles.empty()\n";
-    return true;
-  }
-
-  auto FinalStream = AddStream(task, Mod.getModuleIdentifier());
-  if (!FinalStream)
-    report_fatal_error("Failed to open final output stream");
-
-  int MergedFD;
-  SmallString<128> MergedFilename;
-  if (sys::fs::createTemporaryFile("thinlto-merged", "o", MergedFD,
-                                   MergedFilename))
-    report_fatal_error("Failed to create merged temp file.");
-  llvm::FileRemover MergedFileRemover(MergedFilename);
-  sys::fs::closeFile(MergedFD);
-
-  std::vector<StringRef> Args;
-  std::string LinkerPath = "";
-  if (auto Path = sys::findProgramByName("ld.lld"))
-    LinkerPath = *Path;
-  else if (auto Path = sys::findProgramByName("ld"))
-    LinkerPath = *Path;
-
-  if (LinkerPath.empty())
-    report_fatal_error("Cannot find linkeer (ld or ld.lld) to merge 
partitions.");
-
-  Args.push_back(LinkerPath);
-  Args.push_back("-r");
-  Args.push_back("-o");
-  Args.push_back(MergedFilename);
-
-  for (const auto &File : TempObjectFiles)
-    Args.push_back(File);
-
-  std::string ErrMsg;
-  int Result = sys::ExecuteAndWait(LinkerPath, Args, /*Env=*/std::nullopt,
-                                   /*Redirects=*/{}, /*SecondsToWait=*/0,
-                                   /*MemoryLimit=*/0, &ErrMsg);
-
-  if (Result != 0) {
-    errs() << "Linker failed: " << ErrMsg << "\n";
-    report_fatal_error("Failed to merge split objects.");
-  }
+  // TODO: After CodeGen emission, an arbitrary number of split submodules will
+  // be generated. These fragments need to be merged before the final link
+  // stage to prevent disruptions to the distrubuted ThinLTO workflow.
 
-  {
-    std::unique_ptr<CachedFileStream> &FinalFileStream = *FinalStream;
-    auto BufferOrErr = MemoryBuffer::getFile(MergedFilename);
-    if (!BufferOrErr)
-      report_fatal_error("Failed to read merged object.");
-
-    FinalFileStream->OS->write(BufferOrErr.get()->getBufferStart(),
-                               BufferOrErr.get()->getBufferSize());
-    if (Error Err = FinalFileStream->commit()) {
-      report_fatal_error(Twine("Failed to commit final file stream: ") +
-                         toString(std::move(Err)));
-    }
-  }
   return true;
 }
 
@@ -920,21 +753,9 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, 
AddStreamFn AddStream,
   // the module, if applicable.
   Mod.setPartialSampleProfileRatio(CombinedIndex);
 
-  bool ProfitableToSplit = true;
-  if (ThinLTOSplit) {
-    if (!canDoSplitModule(Mod) || !HasLargeCG(Mod, CombinedIndex)) {
-      ProfitableToSplit = false;
-      LLVM_DEBUG(dbgs() << "warning: thinlto split not enable for module: "
-                        << Mod.getName());
-    } else {
-      LLVM_DEBUG(dbgs() << "thinlto: split codegen for module: "
-                        << Mod.getName());
-    }
-  }
-
   LLVM_DEBUG(dbgs() << "Running ThinLTO\n");
   if (CodeGenOnly) {
-    if (ThinLTOSplit && ProfitableToSplit)
+    if (ThinLTOSplit)
       splitOptAndCodeGenThin(Task, Conf, TM.get(), AddStream,
                              ThinLTOSplitPartitions, Mod, CombinedIndex,
                              CmdArgs, false, IRAddStream, BitcodeLibFuncs);
@@ -951,7 +772,7 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, 
AddStreamFn AddStream,
   auto OptimizeAndCodegen =
       [&](Module &Mod, TargetMachine *TM,
           LLVMRemarkFileHandle DiagnosticOutputFile) {
-        if (ThinLTOSplit && ProfitableToSplit) {
+        if (ThinLTOSplit) {
           if (!splitOptAndCodeGenThin(
                   Task, Conf, TM, AddStream, ThinLTOSplitPartitions, Mod,
                   CombinedIndex, CmdArgs, true, IRAddStream, BitcodeLibFuncs))
diff --git a/llvm/test/Transforms/SplitModuleCG/split-promoted-rename.ll 
b/llvm/test/Transforms/SplitModuleCG/split-promoted-rename.ll
deleted file mode 100644
index 6c51141a9ad85..0000000000000
--- a/llvm/test/Transforms/SplitModuleCG/split-promoted-rename.ll
+++ /dev/null
@@ -1,41 +0,0 @@
-; Test that internal symbols promoted during module splitting are consistently
-; renamed with an MD5 suffix across all partitions.
-;
-; RUN: opt -module-summary %s -o %t.bc
-; RUN: llvm-lto2 run %t.bc -o %t \
-; RUN:   -thinlto-split=true \
-; RUN:   -thinlto-split-partitions=2 -thinlto-split-module-size-threshold=0 \
-; RUN:   -r=%t.bc,caller_a,px \
-; RUN:   -r=%t.bc,caller_b,px
-; RUN: llvm-nm %t.1 | FileCheck %s
-
-; CHECK-DAG: T caller_a
-; CHECK-DAG: T caller_b
-; CHECK:     T {{.*promoted_internal[._][0-9a-f]+.*}}
-; CHECK-NOT: T promoted_internal{{$}}
-
-target datalayout = 
"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-; @promoted_internal is internal. SplitModuleCG::dealWithMpart's checkPromoted
-; records it in PromotedRenames. splitOptAndCodeGenThin applies the rename
-; after opt via:
-;   for (auto &GV : MPart->global_values())
-;     if (auto It = PromotedRenames.find(GV.getName()); ...)
-;       GV.setName(It->second);
-define internal void @promoted_internal() {
-entry:
-  ret void
-}
-
-define void @caller_a() {
-entry:
-  call void @promoted_internal()
-  ret void
-}
-
-define void @caller_b() {
-entry:
-  call void @promoted_internal()
-  ret void
-}

>From 1c3b0b0271043d2d53fc44b201c54132707c834c Mon Sep 17 00:00:00 2001
From: maojiaping <[email protected]>
Date: Thu, 11 Jun 2026 10:15:49 +0800
Subject: [PATCH 6/6] [LTO][SplitModuleCG] Enable split module by callgragh for
 FullLTO

- Rename ThinLTOSplit to LTOSplitByCG for clarity
- Add IsThinLTO parameter to splitOptAndCodeGenThin with default true
- Enable splitOptAndCodeGenThin for FullLTO via else if branch
---
 .../thinlto-split/fulllto-split-module.c      | 26 +++++++++++
 .../thinlto-split/thinlto-split-module.c      | 34 ++++++++++++++
 llvm/lib/LTO/LTOBackend.cpp                   | 46 +++++++++++--------
 3 files changed, 87 insertions(+), 19 deletions(-)
 create mode 100644 clang/test/CodeGen/thinlto-split/fulllto-split-module.c
 create mode 100644 clang/test/CodeGen/thinlto-split/thinlto-split-module.c

diff --git a/clang/test/CodeGen/thinlto-split/fulllto-split-module.c 
b/clang/test/CodeGen/thinlto-split/fulllto-split-module.c
new file mode 100644
index 0000000000000..b3cf7081ee2e0
--- /dev/null
+++ b/clang/test/CodeGen/thinlto-split/fulllto-split-module.c
@@ -0,0 +1,26 @@
+// UNSUPPORTED: system-windows
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang -flto=full -fuse-ld=lld -shared \
+// RUN:   -o %t.o %s \
+// RUN:   -Wl,-mllvm,-lto-split-by-callgraph=true \
+// RUN:   -Wl,--lto-partitions=2 \
+// RUN:   -Wl,--save-temps=prelink
+// RUN: llvm-nm %t.o.lto.o | FileCheck %s --check-prefix=CHECK0
+// RUN: llvm-nm %t.o.lto.1.o | FileCheck %s --check-prefix=CHECK1
+
+// CHECK0-DAG: T caller_b
+// CHECK0-DAG: T promoted_internal
+
+// CHECK1-DAG: T caller_a
+// CHECK1-DAG: U promoted_internal
+
+static void promoted_internal(void) {}
+
+void caller_a(void) {
+    promoted_internal();
+}
+
+void caller_b(void) {
+    promoted_internal();
+}
\ No newline at end of file
diff --git a/clang/test/CodeGen/thinlto-split/thinlto-split-module.c 
b/clang/test/CodeGen/thinlto-split/thinlto-split-module.c
new file mode 100644
index 0000000000000..0725fe49f3e6c
--- /dev/null
+++ b/clang/test/CodeGen/thinlto-split/thinlto-split-module.c
@@ -0,0 +1,34 @@
+// UNSUPPORTED: system-windows
+// REQUIRES: aarch64-registered-target
+
+// Distributed ThinLTO (DTLTO)
+// RUN: %clang -flto=thin -c %s -o %t.o
+// RUN: %clang -flto=thin -fuse-ld=lld -Wl,--thinlto-index-only %t.o
+// RUN: not --crash %clang %t.o -c -fthinlto-index=%t.o.thinlto.bc \
+// RUN:                            -mllvm -lto-split-by-callgraph=true \
+// RUN:                            -mllvm -lto-split-partitions=2
+//
+// Regular ThinLTO
+// RUN: %clang -flto=thin -fuse-ld=lld -shared \
+// RUN:   -o %t.o %s \
+// RUN:   -Wl,-mllvm,-lto-split-by-callgraph=true \
+// RUN:   -Wl,-mllvm,-lto-split-partitions=2 \
+// RUN:   -Wl,--save-temps=prelink
+// RUN: llvm-nm %t.o.lto.o | FileCheck %s --check-prefix=CHECK0
+// RUN: llvm-nm %t.o.lto.1.o | FileCheck %s --check-prefix=CHECK1
+
+// CHECK0-DAG: T caller_b
+// CHECK0-DAG: T {{promoted_internal[.][0-9a-f]+}}
+
+// CHECK1-DAG: T caller_a
+// CHECK1-DAG: U {{promoted_internal[.][0-9a-f]+}}
+
+static void promoted_internal(void) {}
+
+void caller_a(void) {
+    promoted_internal();
+}
+
+void caller_b(void) {
+    promoted_internal();
+}
\ No newline at end of file
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index 2608e2eb54398..33182a96283c9 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -84,12 +84,12 @@ static cl::list<std::string>
                              "path matches this for -save-temps options"),
                     cl::CommaSeparated, cl::Hidden);
 
-static cl::opt<unsigned> ThinLTOSplitPartitions(
-    "thinlto-split-partitions", cl::Hidden, cl::init(0),
-    cl::desc("Control split to how many partitions in thinlto backend."));
+static cl::opt<unsigned> LTOSplitPartitions(
+    "lto-split-partitions", cl::Hidden, cl::init(0),
+    cl::desc("Control split to how many partitions in lto backend."));
 
-static cl::opt<bool> ThinLTOSplit("thinlto-split", cl::init(false),
-                          cl::desc("Enable split module in thinlto backend."));
+static cl::opt<bool> LTOSplitByCG("lto-split-by-callgraph", cl::init(false),
+                          cl::desc("Enable split module in lto backend."));
 
 namespace llvm {
 extern cl::opt<bool> NoPGOWarnMismatch;
@@ -146,7 +146,7 @@ Error Config::addSaveTemps(std::string OutputFileName, bool 
UseInputModulePath,
       // named from the provided OutputFileName with the Task ID appended.
       if (M.getModuleIdentifier() == "ld-temp.o" || !UseInputModulePath) {
         PathPrefix = OutputFileName;
-        if (ThinLTOSplit)
+        if (LTOSplitByCG)
           PathPrefix += extract_filename(M.getSourceFileName()) + ".";
         if (Task != (unsigned)-1)
           PathPrefix += utostr(Task) + ".";
@@ -538,7 +538,8 @@ static bool splitOptAndCodeGenThin(unsigned task, const 
Config &C,
                                    const ModuleSummaryIndex &CombinedIndex,
                                    const std::vector<uint8_t> &CmdArgs,
                                    bool DoOpt, AddStreamFn IRAddStream,
-                                   ArrayRef<StringRef> &BitcodeLibFuncs) {
+                                   ArrayRef<StringRef> &BitcodeLibFuncs,
+                                   bool IsThinLTO = true) {
   const Target *T = &TM->getTarget();
 
   SplitModuleCG SplitModuleCG(Mod, CombinedIndex, 
ParallelCodeGenParallelismLevel);
@@ -563,14 +564,16 @@ static bool splitOptAndCodeGenThin(unsigned task, const 
Config &C,
         cgdata::saveModuleForTwoRounds(*MPart, PartitionId,
                                        IRAddStream);
     }
-    
-    // Rename the GlobalValues whose internal is changed to external. That's
-    // can avoid duplicate symbols.
-    auto PromotedRenames = SplitModuleCG.getPromotedRenames();
-    for (auto &GV : MPart->global_values()) {
-      if (auto It = PromotedRenames.find(GV.getName());
-          It != PromotedRenames.end()) {
-        GV.setName(It->second);
+
+    if (IsThinLTO) {
+      // Rename the GlobalValues whose internal is changed to external. That's
+      // can avoid duplicate symbols int ThinLTO.
+      auto PromotedRenames = SplitModuleCG.getPromotedRenames();
+      for (auto &GV : MPart->global_values()) {
+        if (auto It = PromotedRenames.find(GV.getName());
+            It != PromotedRenames.end()) {
+          GV.setName(It->second);
+        }
       }
     }
 
@@ -690,6 +693,11 @@ Error lto::backend(const Config &C, AddStreamFn AddStream,
 
   if (ParallelCodeGenParallelismLevel == 1) {
     codegen(C, TM.get(), AddStream, 0, Mod, CombinedIndex);
+  } else if (LTOSplitByCG) {
+    splitOptAndCodeGenThin(/*Task*/0, C, TM.get(), AddStream,
+                           ParallelCodeGenParallelismLevel, Mod, CombinedIndex,
+                           /*CmdArgs*/ std::vector<uint8_t>(), /*DoOpt*/false,
+                            AddStreamFn(), BitcodeLibFuncs, false);
   } else {
     splitCodeGen(C, TM.get(), AddStream, ParallelCodeGenParallelismLevel, Mod,
                  CombinedIndex);
@@ -755,9 +763,9 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, 
AddStreamFn AddStream,
 
   LLVM_DEBUG(dbgs() << "Running ThinLTO\n");
   if (CodeGenOnly) {
-    if (ThinLTOSplit)
+    if (LTOSplitByCG)
       splitOptAndCodeGenThin(Task, Conf, TM.get(), AddStream,
-                             ThinLTOSplitPartitions, Mod, CombinedIndex,
+                             LTOSplitPartitions, Mod, CombinedIndex,
                              CmdArgs, false, IRAddStream, BitcodeLibFuncs);
     else
       // If CodeGenOnly is set, we only perform code generation and skip
@@ -772,9 +780,9 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, 
AddStreamFn AddStream,
   auto OptimizeAndCodegen =
       [&](Module &Mod, TargetMachine *TM,
           LLVMRemarkFileHandle DiagnosticOutputFile) {
-        if (ThinLTOSplit) {
+        if (LTOSplitByCG) {
           if (!splitOptAndCodeGenThin(
-                  Task, Conf, TM, AddStream, ThinLTOSplitPartitions, Mod,
+                  Task, Conf, TM, AddStream, LTOSplitPartitions, Mod,
                   CombinedIndex, CmdArgs, true, IRAddStream, BitcodeLibFuncs))
             return 
finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
         } else {

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to