https://github.com/mmjjpp updated https://github.com/llvm/llvm-project/pull/198702
>From d9fc0bd6f1d5767f87eb515a665e76f6d00d1ea4 Mon Sep 17 00:00:00 2001 From: maojiaping <[email protected]> Date: Wed, 20 May 2026 11:22:30 +0800 Subject: [PATCH 1/6] [ThinLTO][Split] Split module for parallel compilation in backend An interface for splitting a module by callgraph is added. This interface is called in the thinlto backend phase. The module is split into N Mparts, and opt and codegen are performed on the Mparts in parallel to implement parallel compilation in the thinlto backend. --- .../llvm/Transforms/Utils/SplitModuleCG.h | 34 ++ llvm/lib/LTO/LTOBackend.cpp | 292 +++++++++++++++++- llvm/lib/Transforms/Utils/CMakeLists.txt | 1 + llvm/lib/Transforms/Utils/SplitModuleCG.cpp | 26 ++ 4 files changed, 336 insertions(+), 17 deletions(-) create mode 100644 llvm/include/llvm/Transforms/Utils/SplitModuleCG.h create mode 100644 llvm/lib/Transforms/Utils/SplitModuleCG.cpp diff --git a/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h b/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h new file mode 100644 index 0000000000000..e60c4e931d40c --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h @@ -0,0 +1,34 @@ +#ifndef LLVM_TRANSFORMS_UTILS_SPLITMODULECG_H +#define LLVM_TRANSFORMS_UTILS_SPLITMODULECG_H + +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/ModuleSummaryAnalysis.h" +#include "llvm/LTO/Config.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" + +namespace llvm { +/// Splits the module M into N linkable partitions. The function ModuleCallback +/// is called N times passing each individual partition as the MPart argument. +class SplitModuleCG { +public: + using ModuleCreationCallback = + function_ref<void(std::unique_ptr<Module> MPart, unsigned PartitionId)>; + SplitModuleCG(Module &M, + const ModuleSummaryIndex &CombinedIndex, + unsigned LimitPartition = 0); + void SplitModule(ModuleCreationCallback ModuleCallback, + const llvm::lto::Config &C); + + unsigned getPartitionNum() { return N; } + + private: + unsigned N; + Module &M; + CallGraph CG; + DenseSet<const Function *> EntryFuncs; +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_SPLITMODULECG_H diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index 73697a9d0d446..11200ade0e8c0 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -34,8 +34,10 @@ #include "llvm/Plugins/PassPlugin.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/FileUtilities.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/VirtualFileSystem.h" @@ -45,6 +47,8 @@ #include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" #include "llvm/Transforms/Utils/SplitModule.h" +#include "llvm/Transforms/Utils/SplitModuleCG.h" +#include <filesystem> #include <optional> using namespace llvm; @@ -80,6 +84,23 @@ static cl::list<std::string> "path matches this for -save-temps options"), cl::CommaSeparated, cl::Hidden); +static cl::opt<unsigned> ThinLTOSplitModuleSizeThreshold( + "thinlto-split-module-size-threshold", cl::Hidden, cl::init(500), + cl::desc("Control the amount of whether split in thinlto backend" + "accroding to the size of a module.")); + +static cl::opt<float> ThinLTOSplitModuleSizeRateThreshold( + "thinlto-split-module-size-rate-threshold", cl::Hidden, cl::init(0.5), + cl::desc("Whether to split in thinlto backend based on the ratio of " + "(callgraph size)/(module size)")); + +static cl::opt<unsigned> ThinLTOSplitPartitions( + "thinlto-split-partitions", cl::Hidden, cl::init(0), + cl::desc("Control split to how many partitions in thinlto backend.")); + +static cl::opt<bool> ThinLTOSplit("thinlto-split", cl::init(false), + cl::desc("Enable split module in thinlto backend.")); + namespace llvm { extern cl::opt<bool> NoPGOWarnMismatch; } @@ -124,12 +145,19 @@ Error Config::addSaveTemps(std::string OutputFileName, bool UseInputModulePath, if (LinkerHook && !LinkerHook(Task, M)) return false; + auto extract_filename = [](const std::string &path) -> std::string { + std::filesystem::path fs_path(path); + return fs_path.filename().string(); + }; + std::string PathPrefix; // If this is the combined module (not a ThinLTO backend compile) or the // user hasn't requested using the input module's path, emit to a file // named from the provided OutputFileName with the Task ID appended. if (M.getModuleIdentifier() == "ld-temp.o" || !UseInputModulePath) { PathPrefix = OutputFileName; + if (ThinLTOSplit) + PathPrefix += extract_filename(M.getSourceFileName()) + "."; if (Task != (unsigned)-1) PathPrefix += utostr(Task) + "."; } else @@ -513,6 +541,212 @@ static void codegen(const Config &Conf, TargetMachine *TM, report_fatal_error(std::move(Err)); } +static unsigned calFunctionSize(const llvm::Function &F) { + unsigned size = 0; + for (const auto &BB : F) + size += std::distance(BB.begin(), BB.end()); + return size; +} + +static unsigned calModuleSize(const llvm::Module &M) { + unsigned size = 0; + for (const auto &F : M) + size += calFunctionSize(F); + return size; +} + +static bool canDoSplitModule(const llvm::Module &M) { + if (calModuleSize(M) < ThinLTOSplitModuleSizeThreshold) + return false; + return true; +} + +static bool HasLargeCG(Module &Mod, const ModuleSummaryIndex &CombinedIndex) { + // TODO: Check whether there has large callgraphs. When multiple callgraphs + // are split, thinlto parallel compilation can bring benefits. + return true; +} + +struct TaskIdAllocator { + using TaskId = unsigned; + + // Use the most significant bit (MSB) as a namespace tag. + // - Original ThinLTO backend tasks are expected to have MSB == 0. + // - Split partitions allocated by this allocator always have MSB == 1. + // This guarantees the two ID spaces never overlap. + static constexpr TaskId tag() { + return TaskId{1} << (std::numeric_limits<TaskId>::digits - 1); + } + + // Monotonic sequence counter for split partitions (MSB must remain 0 here). + std::atomic<TaskId> seq{0}; + + // Allocate a globally unique TaskId for a split partition. + // The returned ID is `tag() | seq`, so it lives in the MSB==1 namespace. + TaskId alloc() { + TaskId v = seq.fetch_add(1, std::memory_order_relaxed); + + // If the counter ever reaches the MSB, we'd overlap namespaces. + // This indicates an overflow / too many partitions. + if (v & tag()) + report_fatal_error("Partition TaskId overflow: seq reached the tag bit."); + + return tag() | v; + } + + // Helper for sanity checks / debugging. + static bool isPartition(TaskId id) { return (id & tag()) != 0; } +}; + +// Global allocator shared by all split partitions. +static TaskIdAllocator gSplitTaskIds; + +static bool splitOptAndCodeGenThin(unsigned task, const Config &C, + TargetMachine *TM, AddStreamFn AddStream, + unsigned ParallelCodeGenParallelismLevel, + Module &Mod, + const ModuleSummaryIndex &CombinedIndex, + const std::vector<uint8_t> &CmdArgs, + bool DoOpt, AddStreamFn IRAddStream, + ArrayRef<StringRef> &BitcodeLibFuncs) { + unsigned ThreadCount = 0; + const Target *T = &TM->getTarget(); + + static std::mutex PrintMutex; + + SplitModuleCG SplitModuleCG(Mod, CombinedIndex, ParallelCodeGenParallelismLevel); + ParallelCodeGenParallelismLevel = SplitModuleCG.getPartitionNum(); + + std::vector<std::string> TempObjectFiles(ParallelCodeGenParallelismLevel); + std::vector<llvm::FileRemover> TempFileRemovers(ParallelCodeGenParallelismLevel); + + const auto HandleModulePartition = [&](std::unique_ptr<Module> MPart, + unsigned PartitionId) { + unsigned CurrentThreadId, UniqueTaskId; + { + std::lock_guard<std::mutex> Lock(PrintMutex); + CurrentThreadId = ThreadCount++; + + // In distributed ThinLTO, `task` may be a sentinel (e.g. -1 cast to + // unsigned), which becomes UINT_MAX and naturally has MSB==1. Treat it + // as "no base task id" and don't enforce the namespace check on it. + // + // We do not rely on the incoming `task` for partition uniqueness: split + // partitions get a dedicated UniqueTaskId allocated below. + if (task != std::numeric_limits<unsigned>::max()) { + assert(!TaskIdAllocator::isPartition(task) && + "Original ThinLTO TaskId unexpectedly overlaps the partition " + "namespace"); + } + UniqueTaskId = gSplitTaskIds.alloc(); + } + + std::unique_ptr<TargetMachine> ThreadTM = createTargetMachine(C, T, *MPart); + + if (DoOpt) { + if (!opt(C, ThreadTM.get(), UniqueTaskId, *MPart, /*IsThinLTO=*/true, + /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex, + CmdArgs, BitcodeLibFuncs)) { + report_fatal_error("Failed to gen opt for split mod in thread."); + } + + // Save the current module before the first codegen round. + // Note that the second codegen round runs only `codegen()` without + // running `opt()`. We're not reaching here as it's bailed out earlier + // with `CodeGenOnly` which has been set in `SecondRoundThinBackend`. + if (IRAddStream) + cgdata::saveModuleForTwoRounds(*MPart, task + CurrentThreadId, + IRAddStream); + } + + auto splitStream = [&](unsigned task, const Twine &moduleName) + -> Expected<std::unique_ptr<CachedFileStream>> { + int FD; + SmallString<128> TempFilename; + if (std::error_code EC = sys::fs::createTemporaryFile( + "thinlto-split", "o", FD, TempFilename)) + return errorCodeToError(EC); + + TempObjectFiles[PartitionId] = std::string(TempFilename.str()); + TempFileRemovers[PartitionId].setFile(TempObjectFiles[PartitionId]); + + auto OS = std::make_unique<raw_fd_ostream>( + FD, true, /*CloseOnDestruct*/true); + + auto Stream = std::make_unique<CachedFileStream>( + std::move(OS), std::string(TempFilename.str())); + + return std::move(Stream); + }; + + codegen(C, ThreadTM.get(), splitStream, UniqueTaskId, *MPart, + CombinedIndex); + }; + + SplitModuleCG.SplitModule(HandleModulePartition, C); + + // Use ld.lld to combine the partitions into a object. + if (TempObjectFiles.empty()) { + llvm::errs() << "TempObjectFiles.empty()\n"; + return true; + } + + auto FinalStream = AddStream(task, Mod.getModuleIdentifier()); + if (!FinalStream) + report_fatal_error("Failed to open final output stream"); + + int MergedFD; + SmallString<128> MergedFilename; + if (sys::fs::createTemporaryFile("thinlto-merged", "o", MergedFD, + MergedFilename)) + report_fatal_error("Failed to create merged temp file."); + llvm::FileRemover MergedFileRemover(MergedFilename); + sys::fs::closeFile(MergedFD); + + std::vector<StringRef> Args; + std::string LinkerPath = ""; + if (auto Path = sys::findProgramByName("ld.lld")) + LinkerPath = *Path; + else if (auto Path = sys::findProgramByName("ld")) + LinkerPath = *Path; + + if (LinkerPath.empty()) + report_fatal_error("Cannot find linkeer (ld or ld.lld) to merge partitions."); + + Args.push_back(LinkerPath); + Args.push_back("-r"); + Args.push_back("-o"); + Args.push_back(MergedFilename); + + for (const auto &File : TempObjectFiles) + Args.push_back(File); + + std::string ErrMsg; + int Result = sys::ExecuteAndWait(LinkerPath, Args, /*Env=*/std::nullopt, + /*Redirects=*/{}, /*SecondsToWait=*/0, + /*MemoryLimit=*/0, &ErrMsg); + + if (Result != 0) { + errs() << "Linker failed: " << ErrMsg << "\n"; + report_fatal_error("Failed to merge split objects."); + } + + { + std::unique_ptr<CachedFileStream> &FinalFileStream = *FinalStream; + auto BufferOrErr = MemoryBuffer::getFile(MergedFilename); + if (!BufferOrErr) + report_fatal_error("Failed to read merged object."); + + FinalFileStream->OS->write(BufferOrErr.get()->getBufferStart(), + BufferOrErr.get()->getBufferSize()); + if (Error Err = FinalFileStream->commit()) { + report_fatal_error(Twine("Failed to commit final file stream: ") + + toString(std::move(Err))); + } + } + return true; +} + static void splitCodeGen(const Config &C, TargetMachine *TM, AddStreamFn AddStream, unsigned ParallelCodeGenParallelismLevel, Module &Mod, @@ -677,11 +911,28 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream, // the module, if applicable. Mod.setPartialSampleProfileRatio(CombinedIndex); + bool ProfitableToSplit = true; + if (ThinLTOSplit) { + if (!canDoSplitModule(Mod) || !HasLargeCG(Mod, CombinedIndex)) { + ProfitableToSplit = false; + LLVM_DEBUG(dbgs() << "warning: thinlto split not enable for module: " + << Mod.getName()); + } else { + LLVM_DEBUG(dbgs() << "thinlto: split codegen for module: " + << Mod.getName()); + } + } + LLVM_DEBUG(dbgs() << "Running ThinLTO\n"); if (CodeGenOnly) { - // If CodeGenOnly is set, we only perform code generation and skip - // optimization. This value may differ from Conf.CodeGenOnly. - codegen(Conf, TM.get(), AddStream, Task, Mod, CombinedIndex); + if (ThinLTOSplit && ProfitableToSplit) + splitOptAndCodeGenThin(Task, Conf, TM.get(), AddStream, + ThinLTOSplitPartitions, Mod, CombinedIndex, + CmdArgs, false, IRAddStream, BitcodeLibFuncs); + else + // If CodeGenOnly is set, we only perform code generation and skip + // optimization. This value may differ from Conf.CodeGenOnly. + codegen(Conf, TM.get(), AddStream, Task, Mod, CombinedIndex); return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); } @@ -691,20 +942,27 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream, auto OptimizeAndCodegen = [&](Module &Mod, TargetMachine *TM, LLVMRemarkFileHandle DiagnosticOutputFile) { - // Perform optimization and code generation for ThinLTO. - if (!opt(Conf, TM, Task, Mod, /*IsThinLTO=*/true, - /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex, - CmdArgs, BitcodeLibFuncs)) - return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); - - // Save the current module before the first codegen round. - // Note that the second codegen round runs only `codegen()` without - // running `opt()`. We're not reaching here as it's bailed out earlier - // with `CodeGenOnly` which has been set in `SecondRoundThinBackend`. - if (IRAddStream) - cgdata::saveModuleForTwoRounds(Mod, Task, IRAddStream); - - codegen(Conf, TM, AddStream, Task, Mod, CombinedIndex); + if (ThinLTOSplit && ProfitableToSplit) { + if (!splitOptAndCodeGenThin( + Task, Conf, TM, AddStream, ThinLTOSplitPartitions, Mod, + CombinedIndex, CmdArgs, true, IRAddStream, BitcodeLibFuncs)) + return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); + } else { + // Perform optimization and code generation for ThinLTO. + if (!opt(Conf, TM, Task, Mod, /*IsThinLTO=*/true, + /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex, + CmdArgs, BitcodeLibFuncs)) + return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); + + // Save the current module before the first codegen round. + // Note that the second codegen round runs only `codegen()` without + // running `opt()`. We're not reaching here as it's bailed out earlier + // with `CodeGenOnly` which has been set in `SecondRoundThinBackend`. + if (IRAddStream) + cgdata::saveModuleForTwoRounds(Mod, Task, IRAddStream); + + codegen(Conf, TM, AddStream, Task, Mod, CombinedIndex); + } return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); }; diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt index 8fe0476ab1a32..01b44ae2cfa29 100644 --- a/llvm/lib/Transforms/Utils/CMakeLists.txt +++ b/llvm/lib/Transforms/Utils/CMakeLists.txt @@ -89,6 +89,7 @@ add_llvm_component_library(LLVMTransformUtils SizeOpts.cpp SplitModule.cpp SplitModuleByCategory.cpp + SplitModuleCG.cpp StripNonLineTableDebugInfo.cpp SymbolRewriter.cpp UnifyFunctionExitNodes.cpp diff --git a/llvm/lib/Transforms/Utils/SplitModuleCG.cpp b/llvm/lib/Transforms/Utils/SplitModuleCG.cpp new file mode 100644 index 0000000000000..9f57cb3ed566e --- /dev/null +++ b/llvm/lib/Transforms/Utils/SplitModuleCG.cpp @@ -0,0 +1,26 @@ +#include "llvm/Transforms/Utils/SplitModuleCG.h" + +using namespace llvm; + +#define DEBUG_TYPE "split-module-CG" + +void SplitModuleCG::SplitModule(ModuleCreationCallback ModuleCallback, + const llvm::lto::Config &C) { + // TODO: 1. Process the linkage of the GlobalValue; 2. Allocate the callgraph + // to N partitions; 3.Invoke the cloneModule API to copy the N partitions to + // obtain MParts. + +} + +SplitModuleCG::SplitModuleCG(Module &M, + const ModuleSummaryIndex &CombinedIndex, + unsigned LimitPartition) + : M(M), CG(M), N(LimitPartition) { + // TODO: The module is split based on the callgraph, and EntryFuncs stores + // the root function of each callgraph. + + if (N == 0 || N > EntryFuncs.size()) { + N = EntryFuncs.size(); + } + N = N == 0 ? 1 : N; +} >From b80904856475a12f4c6a010c1730324406d7b595 Mon Sep 17 00:00:00 2001 From: maojiaping <[email protected]> Date: Wed, 20 May 2026 15:27:29 +0800 Subject: [PATCH 2/6] [Thinlto][Split] Add callgraph-based module splitting(SplitModuleCG) Add a new SplitModuleCG that partitions a module into multiple parts using function callgraph traversal and cost-based load balancing. This is intended for use in thinLTO to parallelize code generation by splitting the module while preserving function call dependencies. Key features: - Build a simplified callgraph to track function calls and roots - Calculate function costs based on IR instruction count - Partition functions with balanced cost distribution - Externalize local symbols and rename promoted symbols to avoid conflicts - Clone module partitions and emit them in parallel --- .../llvm/Transforms/Utils/SplitModuleCG.h | 182 ++++++++- llvm/lib/LTO/LTOBackend.cpp | 10 + llvm/lib/Transforms/Utils/SplitModuleCG.cpp | 367 +++++++++++++++++- 3 files changed, 552 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h b/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h index e60c4e931d40c..956a1ea8030fe 100644 --- a/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h +++ b/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h @@ -1,6 +1,7 @@ #ifndef LLVM_TRANSFORMS_UTILS_SPLITMODULECG_H #define LLVM_TRANSFORMS_UTILS_SPLITMODULECG_H +#include "llvm/ADT/StringSet.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/LTO/Config.h" @@ -8,6 +9,169 @@ #include "llvm/ADT/DenseSet.h" namespace llvm { + +class SimplifyCallGraph; +class SimplifyCallGraphNode; + +using CostType = InstructionCost::CostType; + +class SimplifyCallGraph { + using FunctionMapTy = + std::map<const Function *, std::unique_ptr<SimplifyCallGraphNode>>; + + /// A map from \c Function* to \c SimplifyCallGraphNode*. + FunctionMapTy FunctionMap; + +public: + explicit SimplifyCallGraph(CallGraph &CG, + const ModuleSummaryIndex &CombinedIndex, + Module &M) + : CG(CG), M(M) { + createSimplifyCallGraph(CombinedIndex); + } + ~SimplifyCallGraph() {}; + + using iterator = FunctionMapTy::iterator; + using const_iterator = FunctionMapTy::const_iterator; + + /// Returns the module the call graph corresponds to. + inline iterator begin() { return FunctionMap.begin(); } + inline iterator end() { return FunctionMap.end(); } + inline const_iterator begin() const { return FunctionMap.begin(); } + inline const_iterator end() const { return FunctionMap.end(); } + + /// Returns the call graph node for the provided function. + inline const SimplifyCallGraphNode *operator[](const Function *F) const { + const_iterator I = FunctionMap.find(F); + assert(I != FunctionMap.end() && "Function not in callgraph!"); + return I->second.get(); + } + + /// Returns the call graph node for the provided function. + inline SimplifyCallGraphNode *operator[](const Function *F) { + const_iterator I = FunctionMap.find(F); + assert(I != FunctionMap.end() && "Function not in callgraph!"); + return I->second.get(); + } + + /// Returns the call graph node for the provided function. + inline const SimplifyCallGraphNode *at(const Function *F) const { + const_iterator I = FunctionMap.find(F); + assert(I != FunctionMap.end() && "Function not in callgraph!"); + return I->second.get(); + } + + /// Returns the call graph node for the provided function. + inline SimplifyCallGraphNode *at(const Function *F) { + const_iterator I = FunctionMap.find(F); + assert(I != FunctionMap.end() && "Function not in callgraph!"); + return I->second.get(); + } + + void createSimplifyCallGraph(const ModuleSummaryIndex &CombinedIndex); + void print(); + SimplifyCallGraphNode *getOrInsertFunction(const Function *F); + +private: + CallGraph &CG; + Module &M; +}; + +class SimplifyCallGraphNode { +public: + using CalledFunctionsSet = DenseSet<SimplifyCallGraphNode *>; + inline SimplifyCallGraphNode(SimplifyCallGraph *SCG, Function *F) + : SCG(SCG), F(F) {} + + SimplifyCallGraphNode(const SimplifyCallGraphNode &) = delete; + SimplifyCallGraphNode &operator=(const SimplifyCallGraphNode &) = delete; + + ~SimplifyCallGraphNode() {} + + Function *getFunction() const { return F; } + + unsigned getNumReferences() const { return NumReferences; } + + using iterator = DenseSet<SimplifyCallGraphNode *>::iterator; + using const_iterator = DenseSet<SimplifyCallGraphNode *>::const_iterator; + + inline iterator begin() { return CalledFunctions.begin(); } + inline iterator end() { return CalledFunctions.end(); } + inline const_iterator begin() const { return CalledFunctions.begin(); } + inline const_iterator end() const { return CalledFunctions.end(); } + inline size_t count(SimplifyCallGraphNode * SCGNode) { return CalledFunctions.count(SCGNode); } + inline bool empty() const { return CalledFunctions.empty(); } + inline unsigned size() const { return (unsigned)CalledFunctions.size(); } + + void addCalledFunction(SimplifyCallGraphNode *Called) { + auto [It, Inserted] = CalledFunctions.insert(Called); + if (Inserted) + Called->AddRef(); + } + + void removeCalledFunction(SimplifyCallGraphNode *Called) { + auto NumRemoved = CalledFunctions.erase(Called); + if (NumRemoved > 0) + Called->DropRef(); + } + +private: + friend class SimplifyCallGraph; + + SimplifyCallGraph *SCG; + Function *F; + + DenseSet<SimplifyCallGraphNode *> CalledFunctions; + unsigned NumReferences = 0; + + void DropRef() { --NumReferences; } + void AddRef() { ++NumReferences; } +}; + +static void addAllDependencies(SimplifyCallGraph &SCG, const Function &F, + DenseSet<const Function *> &Fns) { + assert(!F.isDeclaration()); + SmallVector<const Function *> WorkList({&F}); + + while (!WorkList.empty()) { + const auto &CurFn = *WorkList.pop_back_val(); + assert(!CurFn.isDeclaration()); + + // Scan for an indirect call. If such a call is found, we have to + // conservatively assume this can call all non-entrypoint functions in + // the module. + for (auto &SCGNode : *SCG.at(&CurFn)) { + auto *Callee = SCGNode->getFunction(); + if (!Callee || Callee->isDeclaration()) + continue; + if (Callee != &F) + { + auto [It, Inserted] = Fns.insert(Callee); + if (Inserted) + WorkList.push_back(Callee); + } + } + } +} + +struct FunctionWithDependencies { + FunctionWithDependencies(SimplifyCallGraph &SCG, + const DenseMap<const Function *, CostType> &FnCosts, + const Function *F) + : F(F) { + addAllDependencies(SCG, *F, Dependencies); + + TotalCost = FnCosts.at(F); + for (const auto *Dep : Dependencies) { + TotalCost += FnCosts.lookup(Dep); + } + } + + const Function *F = nullptr; + DenseSet<const Function *> Dependencies; + CostType TotalCost = 0; +}; + /// Splits the module M into N linkable partitions. The function ModuleCallback /// is called N times passing each individual partition as the MPart argument. class SplitModuleCG { @@ -21,12 +185,28 @@ class SplitModuleCG { const llvm::lto::Config &C); unsigned getPartitionNum() { return N; } + StringSet<> &getOriginalExternals() { return OriginalExternals; } + StringMap<std::string> &getPromotedRenames() { return PromotedRenames; } - private: +private: unsigned N; Module &M; CallGraph CG; + std::unique_ptr<SimplifyCallGraph> SCG; + CostType ModuleCost; DenseSet<const Function *> EntryFuncs; + StringSet<> OriginalExternals; + StringMap<std::string> PromotedRenames; + DenseMap<const Function *, bool> externalFunction; + DenseMap<const Function *, CostType> FuncsCosts; + SmallVector<FunctionWithDependencies> FWDWorkList; + + void calculateFunctionCosts(); + std::vector<DenseSet<const Function *>> doPartitioning(); + void dealWithMpart( + Module &MPart, unsigned I, + function_ref<bool(const GlobalValue *)> NeedsConservativeImport); + void createWorkList(); }; } // end namespace llvm diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index 11200ade0e8c0..aa1213e5e6af1 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -658,6 +658,16 @@ static bool splitOptAndCodeGenThin(unsigned task, const Config &C, cgdata::saveModuleForTwoRounds(*MPart, task + CurrentThreadId, IRAddStream); } + + // Rename the GlobalValues whose internal is changed to external. That's + // can avoid duplicate symbols. + auto PromotedRenames = SplitModuleCG.getPromotedRenames(); + for (auto &GV : MPart->global_values()) { + if (auto It = PromotedRenames.find(GV.getName()); + It != PromotedRenames.end()) { + GV.setName(It->second); + } + } auto splitStream = [&](unsigned task, const Twine &moduleName) -> Expected<std::unique_ptr<CachedFileStream>> { diff --git a/llvm/lib/Transforms/Utils/SplitModuleCG.cpp b/llvm/lib/Transforms/Utils/SplitModuleCG.cpp index 9f57cb3ed566e..debdddfb79041 100644 --- a/llvm/lib/Transforms/Utils/SplitModuleCG.cpp +++ b/llvm/lib/Transforms/Utils/SplitModuleCG.cpp @@ -1,26 +1,381 @@ #include "llvm/Transforms/Utils/SplitModuleCG.h" - +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/MD5.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include <thread> using namespace llvm; #define DEBUG_TYPE "split-module-CG" +namespace { + +static cl::opt<bool> enablePrintSimplifyCallGraph( + "enable-print-simplify-callgraph", cl::Hidden, cl::init(false), + cl::desc("print SimplifyCallGraph")); + +using PartitionID = unsigned; + +static void externalize(GlobalValue *GV) { + if (GV->hasLocalLinkage()) { + GV->setLinkage(GlobalValue::ExternalLinkage); + GV->setVisibility(GlobalValue::HiddenVisibility); + } + + // Unnamed entities must be named consistently between modules. setName will + // give a distinct name to each such entity. + if (!GV->hasName()) + GV->setName("__llvmsplit_unnamed"); +} + +} // namespace + +std::vector<DenseSet<const Function *>> SplitModuleCG::doPartitioning() { + LLVM_DEBUG(dbgs() << "\n--Partitioning Starts--\n"); + // Performs all of the partitioning work on M. + std::vector<DenseSet<const Function *>> Partitions; + Partitions.resize(N); + if (N == 0) + return Partitions; + + auto ComparePartitions = [](const std::pair<PartitionID, CostType> &a, + const std::pair<PartitionID, CostType> &b) { + // When two partitions have the same cost, assign to the one with the + // biggest ID first. This allows us to put things in P0 last, because P0 may + // have other stuff added later. + if (a.second == b.second) + return a.first < b.first; + return a.second > b.second; + }; + + std::vector<std::pair<PartitionID, CostType>> BalancingQueue; + for (unsigned I = 0; I < N; ++I) + BalancingQueue.emplace_back(I, 0); + + // Helper function to handle assigning a function to a partition. This takes + // care of updating the balancing queue. + const auto AssignToPartition = [&](PartitionID PID, + const FunctionWithDependencies &FWD) { + auto &FnsInPart = Partitions[PID]; + FnsInPart.insert(FWD.F); + for (const Function *Dep : FWD.Dependencies) { + FnsInPart.insert(Dep); + } + + // Update the balancing queue. we scan backwards because in the common case + // the partition is at the end. + for (auto &[QueuePID, Cost] : reverse(BalancingQueue)) { + if (QueuePID == PID) { + CostType NewCost = 0; + for (auto *Fn : Partitions[PID]) + NewCost += FuncsCosts.at(Fn); + Cost = NewCost; + } + } + + sort(BalancingQueue, ComparePartitions); + }; + + for (auto &CurFn : FWDWorkList) { + // Normal "load-balancing", assign to partition with least pressure. + auto [PID, CurCost] = BalancingQueue.back(); + AssignToPartition(PID, CurFn); + } + + return Partitions; +} + +void SplitModuleCG::calculateFunctionCosts() { + ModuleCost = 0; + for (auto &Fn : M) { + if (Fn.isDeclaration()) + continue; + + CostType FnCost = 0; + for (const auto &BB : Fn) { + CostType CostVal = std::distance(BB.begin(), BB.end()); + FnCost += CostVal; + } + assert(FnCost != 0); + FuncsCosts[&Fn] = FnCost; + assert((ModuleCost + FnCost) >= ModuleCost && "Overflow!"); + ModuleCost += FnCost; + } +} + +void SplitModuleCG::dealWithMpart(Module &MPart, unsigned I, + function_ref<bool(const GlobalValue *)> NeedsConservativeImport) { + // collect symbols to rename + auto checkPromoted = [&](const GlobalValue &GV) { + // now is external (not local), but not in external set. + if (!GV.hasLocalLinkage() && !OriginalExternals.contains(GV.getName())) { + if (PromotedRenames.count(GV.getName())) + return; + MD5 Hash; + Hash.update(M.getModuleIdentifier()); + MD5::MD5Result Result; + Hash.final(Result); + SmallString<32> HashStr; + MD5::stringifyResult(Result, HashStr); + std::string NewName = (GV.getName() + "." + HashStr.str().substr(0, 8)).str(); + PromotedRenames[GV.getName()] = NewName; + } + }; + + auto AvailableExternalizeFunc = [&](llvm::Function &Func) { + Func.setLinkage(GlobalValue::AvailableExternallyLinkage); + Func.setComdat(nullptr); + }; + + for (const auto &GV : MPart.global_values()) + checkPromoted(GV); + // Clean-up conservatively imported GVs without any users. + for (auto &GV : make_early_inc_range(MPart.globals())) { + if (NeedsConservativeImport(&GV) && GV.use_empty()) + GV.eraseFromParent(); + } + + for (auto &func : MPart.functions()) { + auto Fn = M.getFunction(func.getName()); + if (externalFunction.count(Fn) && !func.isDeclaration()) { + if (!externalFunction[Fn]) { + AvailableExternalizeFunc(func); + } else { + externalFunction[Fn] = false; + } + } + } + + LLVM_DEBUG(dbgs() << MPart.getModuleIdentifier() << " : \n"); + for (auto &F : MPart) { + if (!F.isDeclaration()) + LLVM_DEBUG(dbgs() << " [Function: ] " << I << " " << F.getName() << " " + << F.getLinkage() << "\n"); + } +} + +void SplitModuleCG::createWorkList() { + // First, find all the entry functions with an in-degree of 0 + // (i.e., those that are not called by any function). + for (auto &NodePair : *SCG) { + SimplifyCallGraphNode *SCGNode = NodePair.second.get(); + Function *F = SCGNode->getFunction(); + if (F && SCGNode->getNumReferences() == 0) { + EntryFuncs.insert(F); + } + } + + // Second, find all the dependencies of each entry function. + for (auto *F : EntryFuncs) { + FWDWorkList.emplace_back(*SCG, FuncsCosts, F); + } + + // Third, find all the functions that are not in the worklist. + DenseSet<const Function *> SeenFunctions; + for (const auto &FWD : FWDWorkList) { + SeenFunctions.insert(FWD.F); + SeenFunctions.insert(FWD.Dependencies.begin(), FWD.Dependencies.end()); + } + for (auto &F : M) { + // This function may be in a ring, and therefore is not a dependency of + // any root, which is treated as a root function here. + if (!F.isDeclaration() && !SeenFunctions.count(&F)) { + FWDWorkList.emplace_back(*SCG, FuncsCosts, &F); + auto &FWD = FWDWorkList.back(); + EntryFuncs.insert(&F); + SeenFunctions.insert(FWD.F); + SeenFunctions.insert(FWD.Dependencies.begin(), FWD.Dependencies.end()); + } + } + + // Sort the worklist so the most expensive roots are seen first. + sort(FWDWorkList, [&](auto &A, auto &B) { + // Sort by total cost, and if the total cost is identical, sort + // alphabetically + if (A.TotalCost == B.TotalCost) + return A.F->getName() < B.F->getName(); + return A.TotalCost > B.TotalCost; + }); + + LLVM_DEBUG(dbgs() << "Number of callgraphs to be allocated: " + << FWDWorkList.size() << " Module cost: " + << ModuleCost << "\n"); + LLVM_DEBUG(dbgs() << "callgraphs: \n"); + for (auto FWD : FWDWorkList) { + LLVM_DEBUG(dbgs() << "[root] " << FWD.F->getName() << " (totalCost:" + << FWD.TotalCost << "; root function cost: " + << FuncsCosts[FWD.F] << "; has dependency: " + << FWD.Dependencies.size() << "\n"); + } +} + void SplitModuleCG::SplitModule(ModuleCreationCallback ModuleCallback, const llvm::lto::Config &C) { - // TODO: 1. Process the linkage of the GlobalValue; 2. Allocate the callgraph - // to N partitions; 3.Invoke the cloneModule API to copy the N partitions to - // obtain MParts. + for (Function &F : M) { + if (F.hasLocalLinkage() && F.hasOneUse() && !F.hasAddressTaken()) + continue; + externalize(&F); + if (!F.isDeclaration() && + (F.hasExternalLinkage() || !F.isDefinitionExact())) + externalFunction[&F] = true; + } + for (GlobalVariable &GV : M.globals()) + externalize(&GV); + for (GlobalAlias &GA : M.aliases()) + externalize(&GA); + for (GlobalIFunc &GI : M.ifuncs()) + externalize(&GI); + // TODO: Consider optimizing the alias, replacing the determined alias with + // the determined aliasee. + + // Assign callgraphs into N partitions. + auto Partitions = doPartitioning(); + assert(Partitions.size() == N); + + // local GVs need to be conservatively imported into [dependency] every module, + // and then cleaned up afterwards. + const auto NeedsConservativeImport = [&](const GlobalValue *GV) { + // We conservatively import private/internal GVs into every module and clean + // them up afterwards. + const auto *Var = dyn_cast<GlobalVariable>(GV); + return Var && Var->hasLocalLinkage(); + }; + + auto ShouldCloneDefinition = [&](unsigned I, const GlobalValue *GV) { + const auto &FnsInPart = Partitions[I]; + + // Functions go in their assigned partition. + if (const auto *newFn = dyn_cast<Function>(GV)) { + const auto *Fn = M.getFunction(newFn->getName()); + return FnsInPart.contains(Fn); + } + if (NeedsConservativeImport(GV)) + return true; + // Everything else goes in the first partition. + return I == 0; + }; + + // TODO: In the future, it may be considered to also include clonemodule in + // parallel to reduce compilation time. + std::vector<std::thread> Threads; + Threads.reserve(N); + std::vector<std::unique_ptr<Module>> MPartInCtxs; + MPartInCtxs.resize(N); + for (unsigned I = 0; I < N; ++I) { + ValueToValueMapTy VMap; + std::unique_ptr<Module> MPart( + CloneModule(M, VMap, [&](const GlobalValue *GV) { + return ShouldCloneDefinition(I, GV); + })); + + dealWithMpart(*MPart, I, NeedsConservativeImport); + + // If not clone module in multi-thread, we also need to clone + // the module obtained through segmentation into a new context + // to avoid data races. + SmallString<0> BC; + raw_svector_ostream BCOS(BC); + WriteBitcodeToFile(*MPart, BCOS); + MPart.reset(); + Threads.emplace_back([&, I](SmallString<0> BC) { + llvm::lto::LTOLLVMContext Ctx(C); + Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile( + MemoryBufferRef(BC.str(), "ld-temp.o"), Ctx); + BC = SmallString<0>(); + if (!MOrErr) + report_fatal_error("Failed to read bitcode"); + ModuleCallback(std::move(MOrErr.get()), I); + }, std::move(BC)); + } + for (auto &T : Threads) + T.join(); } SplitModuleCG::SplitModuleCG(Module &M, const ModuleSummaryIndex &CombinedIndex, unsigned LimitPartition) : M(M), CG(M), N(LimitPartition) { - // TODO: The module is split based on the callgraph, and EntryFuncs stores - // the root function of each callgraph. + // Track existing non-local symbols. This ensures that when we promote + // internal symbols to external for partitioning, we can handle renaming + // and avoid conflicts. + for (const auto &GV : M.global_values()) + if (!GV.hasLocalLinkage()) + OriginalExternals.insert(GV.getName()); + + calculateFunctionCosts(); + + // Construct a simplified call graph to facilitate worklist generation. + SCG = std::make_unique<SimplifyCallGraph>(CG, CombinedIndex, M); + // TODO: When the SCG is established, the special cases of comdat and + // initarray need to be considered. + + // Populate the worklist with root functions and their transitive + // dependencies. This worklist serves as the foundation for the + // subsequent module partitioning. + createWorkList(); if (N == 0 || N > EntryFuncs.size()) { N = EntryFuncs.size(); } N = N == 0 ? 1 : N; } + +void SimplifyCallGraph::createSimplifyCallGraph( + const ModuleSummaryIndex &CombinedIndex) { + for (auto &NodePair : CG) { + CallGraphNode *CGNode = NodePair.second.get(); + Function *F = CGNode->getFunction(); + if (!F || F->isDeclaration()) + continue; + + SimplifyCallGraphNode *SCGNode = getOrInsertFunction(F); + + //TODO: Trace indirect call usage for the current function. + + for (const auto &CGNodeItem : *CGNode) { + Function *Called = CGNodeItem.second->getFunction(); + if (!Called) { + //TODO: Deal with indirect call. + // 1. Check if the instruction has a callees metadata. + // 2. Check if this is an indirect call with profile data. + // 3. Check if this is an alias to a function. + } + if (!Called || Called->isDeclaration()) + continue; + SCGNode->addCalledFunction(getOrInsertFunction(Called)); + } + } + + if (enablePrintSimplifyCallGraph) + print(); +} + + +void SimplifyCallGraph::print() { + for (auto &SCGItem : FunctionMap) { + LLVM_DEBUG(dbgs() << "Call graph node for function: '" + << SCGItem.first->getName() << "' #uses=" + << SCGItem.second->getNumReferences() << "\n"); + + for (const auto &callee : *SCGItem.second) { + LLVM_DEBUG(dbgs() <<" Calls function : '" + << callee->getFunction()->getName() << " '\n"); + } + } +} + +SimplifyCallGraphNode * +SimplifyCallGraph::getOrInsertFunction(const Function *F) { + auto &SCGN = FunctionMap[F]; + if (SCGN) + return SCGN.get(); + + SCGN = + std::make_unique<SimplifyCallGraphNode>(this, const_cast<Function *>(F)); + return SCGN.get(); +} >From 88db8d4e7fbcadc73e1c48c23bb8781b2c21df4f Mon Sep 17 00:00:00 2001 From: maojiaping <[email protected]> Date: Wed, 20 May 2026 15:57:13 +0800 Subject: [PATCH 3/6] [llvm-split][SplitModuleCG] Add support for SplitModuleCG Add a new command line option --enable-split-module-CG to llvm-split tool for testing the SplitModuleCG utility. The change: - Adds --enable-split-module-CG flag - Wire up the SplitModuleCG interface in llvm-split --- .../SplitModuleCG/split-promoted-rename.ll | 41 +++++++++++++++++++ .../SplitModuleCG/function-with-ring.ll | 36 ++++++++++++++++ .../llvm-split/SplitModuleCG/function.ll | 35 ++++++++++++++++ .../llvm-split/SplitModuleCG/partition-cap.ll | 10 +++++ .../SplitModuleCG/single-partition.ll | 13 ++++++ .../tools/llvm-split/SplitModuleCG/unnamed.ll | 8 ++++ llvm/tools/llvm-split/llvm-split.cpp | 36 ++++++++++++++++ 7 files changed, 179 insertions(+) create mode 100644 llvm/test/Transforms/SplitModuleCG/split-promoted-rename.ll create mode 100644 llvm/test/tools/llvm-split/SplitModuleCG/function-with-ring.ll create mode 100644 llvm/test/tools/llvm-split/SplitModuleCG/function.ll create mode 100644 llvm/test/tools/llvm-split/SplitModuleCG/partition-cap.ll create mode 100644 llvm/test/tools/llvm-split/SplitModuleCG/single-partition.ll create mode 100644 llvm/test/tools/llvm-split/SplitModuleCG/unnamed.ll diff --git a/llvm/test/Transforms/SplitModuleCG/split-promoted-rename.ll b/llvm/test/Transforms/SplitModuleCG/split-promoted-rename.ll new file mode 100644 index 0000000000000..6c51141a9ad85 --- /dev/null +++ b/llvm/test/Transforms/SplitModuleCG/split-promoted-rename.ll @@ -0,0 +1,41 @@ +; Test that internal symbols promoted during module splitting are consistently +; renamed with an MD5 suffix across all partitions. +; +; RUN: opt -module-summary %s -o %t.bc +; RUN: llvm-lto2 run %t.bc -o %t \ +; RUN: -thinlto-split=true \ +; RUN: -thinlto-split-partitions=2 -thinlto-split-module-size-threshold=0 \ +; RUN: -r=%t.bc,caller_a,px \ +; RUN: -r=%t.bc,caller_b,px +; RUN: llvm-nm %t.1 | FileCheck %s + +; CHECK-DAG: T caller_a +; CHECK-DAG: T caller_b +; CHECK: T {{.*promoted_internal[._][0-9a-f]+.*}} +; CHECK-NOT: T promoted_internal{{$}} + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; @promoted_internal is internal. SplitModuleCG::dealWithMpart's checkPromoted +; records it in PromotedRenames. splitOptAndCodeGenThin applies the rename +; after opt via: +; for (auto &GV : MPart->global_values()) +; if (auto It = PromotedRenames.find(GV.getName()); ...) +; GV.setName(It->second); +define internal void @promoted_internal() { +entry: + ret void +} + +define void @caller_a() { +entry: + call void @promoted_internal() + ret void +} + +define void @caller_b() { +entry: + call void @promoted_internal() + ret void +} diff --git a/llvm/test/tools/llvm-split/SplitModuleCG/function-with-ring.ll b/llvm/test/tools/llvm-split/SplitModuleCG/function-with-ring.ll new file mode 100644 index 0000000000000..f2fc8c03c922a --- /dev/null +++ b/llvm/test/tools/llvm-split/SplitModuleCG/function-with-ring.ll @@ -0,0 +1,36 @@ +; RUN: llvm-split -enable-split-module-CG=true -j2 -o %t %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s + +; CHECK0-DAG: declare void @foo() +; CHECK0-DAG: define void @bar() +; CHECK0-DAG: declare void @call_foo() +; CHECK0-DAG: define void @call_bar() + +; CHECK1-DAG: define void @foo() +; CHECK1-DAG: declare void @bar() +; CHECK1-DAG: define void @call_foo() +; CHECK1-DAG: declare void @call_bar() + +define void @foo() { +entry: + call void @call_foo() + ret void +} + +define void @bar() { +entry: + ret void +} + +define void @call_foo() { +entry: + call void @foo() + ret void +} + +define void @call_bar() { +entry: + call void @bar() + ret void +} diff --git a/llvm/test/tools/llvm-split/SplitModuleCG/function.ll b/llvm/test/tools/llvm-split/SplitModuleCG/function.ll new file mode 100644 index 0000000000000..ddf5bb5c3dff3 --- /dev/null +++ b/llvm/test/tools/llvm-split/SplitModuleCG/function.ll @@ -0,0 +1,35 @@ +; RUN: llvm-split -enable-split-module-CG=true -j2 -o %t %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s + +; CHECK0-DAG: declare dso_local void @foo() +; CHECK0-DAG: define void @bar() +; CHECK0-DAG: declare void @func_a() +; CHECK0-DAG: define void @func_b() +; CHECK1-DAG: define internal void @foo() +; CHECK1-DAG: define available_externally void @bar() +; CHECK1-DAG: define void @func_a() +; CHECK1-DAG: declare void @func_b() + +define internal void @foo() { +entry: + ret void +} + +define void @bar() { +entry: + ret void +} + +define void @func_a() { +entry: + call void @foo() + call void @bar() + ret void +} + +define void @func_b() { +entry: + call void @bar() + ret void +} diff --git a/llvm/test/tools/llvm-split/SplitModuleCG/partition-cap.ll b/llvm/test/tools/llvm-split/SplitModuleCG/partition-cap.ll new file mode 100644 index 0000000000000..5c3ced3e682af --- /dev/null +++ b/llvm/test/tools/llvm-split/SplitModuleCG/partition-cap.ll @@ -0,0 +1,10 @@ +; RUN: llvm-split -enable-split-module-CG=true -j10 -o %t %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s +; should only produce 2 output files (N capped to EntryFuncs.size()=2) + +; CHECK0: define void @foo() +; CHECK1: define void @bar() + +define void @foo() { ret void } +define void @bar() { ret void } diff --git a/llvm/test/tools/llvm-split/SplitModuleCG/single-partition.ll b/llvm/test/tools/llvm-split/SplitModuleCG/single-partition.ll new file mode 100644 index 0000000000000..fdfdf910a3498 --- /dev/null +++ b/llvm/test/tools/llvm-split/SplitModuleCG/single-partition.ll @@ -0,0 +1,13 @@ +; RUN: llvm-split -enable-split-module-CG=true -j1 -o %t %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s + +; CHECK0: define void @foo() +; CHECK0: define void @bar() + +define void @foo() { + call void @bar() + ret void +} +define void @bar() { + ret void +} diff --git a/llvm/test/tools/llvm-split/SplitModuleCG/unnamed.ll b/llvm/test/tools/llvm-split/SplitModuleCG/unnamed.ll new file mode 100644 index 0000000000000..73f7079669c55 --- /dev/null +++ b/llvm/test/tools/llvm-split/SplitModuleCG/unnamed.ll @@ -0,0 +1,8 @@ +; RUN: llvm-split -enable-split-module-CG=true -j2 -o %t %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s + +; CHECK0-DAG: define hidden void @__llvmsplit_unnamed() + +define internal void @0() { + ret void +} \ No newline at end of file diff --git a/llvm/tools/llvm-split/llvm-split.cpp b/llvm/tools/llvm-split/llvm-split.cpp index 4cc4fd945fc53..4156222855617 100644 --- a/llvm/tools/llvm-split/llvm-split.cpp +++ b/llvm/tools/llvm-split/llvm-split.cpp @@ -18,8 +18,10 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/PassInstrumentation.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/Verifier.h" #include "llvm/IRReader/IRReader.h" +#include "llvm/LTO/Config.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" @@ -35,6 +37,7 @@ #include "llvm/Transforms/IPO/GlobalDCE.h" #include "llvm/Transforms/Utils/SplitModule.h" #include "llvm/Transforms/Utils/SplitModuleByCategory.h" +#include "llvm/Transforms/Utils/SplitModuleCG.h" using namespace llvm; @@ -76,6 +79,10 @@ static cl::opt<std::string> static cl::opt<std::string> MCPU("mcpu", cl::desc("Target CPU, ignored if --mtriple is not used"), cl::value_desc("cpu"), cl::cat(SplitCategory)); + +static cl::opt<bool> + EnableSplitModuleCG("enable-split-module-CG", cl::Prefix, cl::init(false), + cl::desc("Split module using call graph"), cl::cat(SplitCategory)); enum class SplitByCategoryType { SBCT_ByAttribute, @@ -327,6 +334,35 @@ int main(int argc, char **argv) { "splitModule implementation\n"; } + if (EnableSplitModuleCG) { + const auto HandleModulePartCG = [&](std::unique_ptr<Module> MPart, unsigned I) { + std::error_code EC; + std::unique_ptr<ToolOutputFile> Out( + new ToolOutputFile(OutputFilename + utostr(I), EC, sys::fs::OF_None)); + if (EC) { + errs() << EC.message() << '\n'; + exit(1); + } + + if (verifyModule(*MPart, &errs())) { + errs() << "Broken module!\n"; + exit(1); + } + + WriteBitcodeToFile(*MPart, Out->os()); + + // Declare success. + Out->keep(); + }; + + llvm::lto::Config Config; + ModuleSummaryIndex CombinedIndex(false); + SplitModuleCG SplitModuleCG(*M, CombinedIndex, NumOutputs); + SplitModuleCG.SplitModule(HandleModulePartCG, Config); + return 0; + } + SplitModule(*M, NumOutputs, HandleModulePart, PreserveLocals, RoundRobin); return 0; } + >From 073f4c1ce6305c1867ca607db45c66843e392f63 Mon Sep 17 00:00:00 2001 From: maojiaping <[email protected]> Date: Fri, 12 Jun 2026 15:17:07 +0800 Subject: [PATCH 4/6] [SplitModuleCG] Fix warning errors - Remove unused variable. - Fix constructor initialization order to match class declaration order (N, M, CG). --- llvm/include/llvm/Transforms/Utils/SplitModuleCG.h | 6 ++---- llvm/lib/LTO/LTOBackend.cpp | 1 - llvm/lib/Transforms/Utils/SplitModuleCG.cpp | 2 +- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h b/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h index 956a1ea8030fe..9836376b94a82 100644 --- a/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h +++ b/llvm/include/llvm/Transforms/Utils/SplitModuleCG.h @@ -26,7 +26,7 @@ class SimplifyCallGraph { explicit SimplifyCallGraph(CallGraph &CG, const ModuleSummaryIndex &CombinedIndex, Module &M) - : CG(CG), M(M) { + : CG(CG) { createSimplifyCallGraph(CombinedIndex); } ~SimplifyCallGraph() {}; @@ -74,14 +74,13 @@ class SimplifyCallGraph { private: CallGraph &CG; - Module &M; }; class SimplifyCallGraphNode { public: using CalledFunctionsSet = DenseSet<SimplifyCallGraphNode *>; inline SimplifyCallGraphNode(SimplifyCallGraph *SCG, Function *F) - : SCG(SCG), F(F) {} + : F(F) {} SimplifyCallGraphNode(const SimplifyCallGraphNode &) = delete; SimplifyCallGraphNode &operator=(const SimplifyCallGraphNode &) = delete; @@ -118,7 +117,6 @@ class SimplifyCallGraphNode { private: friend class SimplifyCallGraph; - SimplifyCallGraph *SCG; Function *F; DenseSet<SimplifyCallGraphNode *> CalledFunctions; diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index aa1213e5e6af1..cfe196a74e1b2 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -609,7 +609,6 @@ static bool splitOptAndCodeGenThin(unsigned task, const Config &C, const std::vector<uint8_t> &CmdArgs, bool DoOpt, AddStreamFn IRAddStream, ArrayRef<StringRef> &BitcodeLibFuncs) { - unsigned ThreadCount = 0; const Target *T = &TM->getTarget(); static std::mutex PrintMutex; diff --git a/llvm/lib/Transforms/Utils/SplitModuleCG.cpp b/llvm/lib/Transforms/Utils/SplitModuleCG.cpp index debdddfb79041..c50111204e1f0 100644 --- a/llvm/lib/Transforms/Utils/SplitModuleCG.cpp +++ b/llvm/lib/Transforms/Utils/SplitModuleCG.cpp @@ -299,7 +299,7 @@ void SplitModuleCG::SplitModule(ModuleCreationCallback ModuleCallback, SplitModuleCG::SplitModuleCG(Module &M, const ModuleSummaryIndex &CombinedIndex, unsigned LimitPartition) - : M(M), CG(M), N(LimitPartition) { + : N(LimitPartition), M(M), CG(M) { // Track existing non-local symbols. This ensures that when we promote // internal symbols to external for partitioning, we can handle renaming // and avoid conflicts. >From 76ed2093f94ceed85d596fb4371326c784bb7c3e Mon Sep 17 00:00:00 2001 From: maojiaping <[email protected]> Date: Mon, 22 Jun 2026 09:09:02 +0800 Subject: [PATCH 5/6] [ThinLTO][SplitModuleCG] Trim non-core code --- llvm/lib/LTO/LTOBackend.cpp | 199 +----------------- .../SplitModuleCG/split-promoted-rename.ll | 41 ---- 2 files changed, 10 insertions(+), 230 deletions(-) delete mode 100644 llvm/test/Transforms/SplitModuleCG/split-promoted-rename.ll diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index cfe196a74e1b2..2608e2eb54398 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -84,16 +84,6 @@ static cl::list<std::string> "path matches this for -save-temps options"), cl::CommaSeparated, cl::Hidden); -static cl::opt<unsigned> ThinLTOSplitModuleSizeThreshold( - "thinlto-split-module-size-threshold", cl::Hidden, cl::init(500), - cl::desc("Control the amount of whether split in thinlto backend" - "accroding to the size of a module.")); - -static cl::opt<float> ThinLTOSplitModuleSizeRateThreshold( - "thinlto-split-module-size-rate-threshold", cl::Hidden, cl::init(0.5), - cl::desc("Whether to split in thinlto backend based on the ratio of " - "(callgraph size)/(module size)")); - static cl::opt<unsigned> ThinLTOSplitPartitions( "thinlto-split-partitions", cl::Hidden, cl::init(0), cl::desc("Control split to how many partitions in thinlto backend.")); @@ -541,66 +531,6 @@ static void codegen(const Config &Conf, TargetMachine *TM, report_fatal_error(std::move(Err)); } -static unsigned calFunctionSize(const llvm::Function &F) { - unsigned size = 0; - for (const auto &BB : F) - size += std::distance(BB.begin(), BB.end()); - return size; -} - -static unsigned calModuleSize(const llvm::Module &M) { - unsigned size = 0; - for (const auto &F : M) - size += calFunctionSize(F); - return size; -} - -static bool canDoSplitModule(const llvm::Module &M) { - if (calModuleSize(M) < ThinLTOSplitModuleSizeThreshold) - return false; - return true; -} - -static bool HasLargeCG(Module &Mod, const ModuleSummaryIndex &CombinedIndex) { - // TODO: Check whether there has large callgraphs. When multiple callgraphs - // are split, thinlto parallel compilation can bring benefits. - return true; -} - -struct TaskIdAllocator { - using TaskId = unsigned; - - // Use the most significant bit (MSB) as a namespace tag. - // - Original ThinLTO backend tasks are expected to have MSB == 0. - // - Split partitions allocated by this allocator always have MSB == 1. - // This guarantees the two ID spaces never overlap. - static constexpr TaskId tag() { - return TaskId{1} << (std::numeric_limits<TaskId>::digits - 1); - } - - // Monotonic sequence counter for split partitions (MSB must remain 0 here). - std::atomic<TaskId> seq{0}; - - // Allocate a globally unique TaskId for a split partition. - // The returned ID is `tag() | seq`, so it lives in the MSB==1 namespace. - TaskId alloc() { - TaskId v = seq.fetch_add(1, std::memory_order_relaxed); - - // If the counter ever reaches the MSB, we'd overlap namespaces. - // This indicates an overflow / too many partitions. - if (v & tag()) - report_fatal_error("Partition TaskId overflow: seq reached the tag bit."); - - return tag() | v; - } - - // Helper for sanity checks / debugging. - static bool isPartition(TaskId id) { return (id & tag()) != 0; } -}; - -// Global allocator shared by all split partitions. -static TaskIdAllocator gSplitTaskIds; - static bool splitOptAndCodeGenThin(unsigned task, const Config &C, TargetMachine *TM, AddStreamFn AddStream, unsigned ParallelCodeGenParallelismLevel, @@ -611,39 +541,15 @@ static bool splitOptAndCodeGenThin(unsigned task, const Config &C, ArrayRef<StringRef> &BitcodeLibFuncs) { const Target *T = &TM->getTarget(); - static std::mutex PrintMutex; - SplitModuleCG SplitModuleCG(Mod, CombinedIndex, ParallelCodeGenParallelismLevel); ParallelCodeGenParallelismLevel = SplitModuleCG.getPartitionNum(); - std::vector<std::string> TempObjectFiles(ParallelCodeGenParallelismLevel); - std::vector<llvm::FileRemover> TempFileRemovers(ParallelCodeGenParallelismLevel); - const auto HandleModulePartition = [&](std::unique_ptr<Module> MPart, unsigned PartitionId) { - unsigned CurrentThreadId, UniqueTaskId; - { - std::lock_guard<std::mutex> Lock(PrintMutex); - CurrentThreadId = ThreadCount++; - - // In distributed ThinLTO, `task` may be a sentinel (e.g. -1 cast to - // unsigned), which becomes UINT_MAX and naturally has MSB==1. Treat it - // as "no base task id" and don't enforce the namespace check on it. - // - // We do not rely on the incoming `task` for partition uniqueness: split - // partitions get a dedicated UniqueTaskId allocated below. - if (task != std::numeric_limits<unsigned>::max()) { - assert(!TaskIdAllocator::isPartition(task) && - "Original ThinLTO TaskId unexpectedly overlaps the partition " - "namespace"); - } - UniqueTaskId = gSplitTaskIds.alloc(); - } - std::unique_ptr<TargetMachine> ThreadTM = createTargetMachine(C, T, *MPart); if (DoOpt) { - if (!opt(C, ThreadTM.get(), UniqueTaskId, *MPart, /*IsThinLTO=*/true, + if (!opt(C, ThreadTM.get(), PartitionId, *MPart, /*IsThinLTO=*/true, /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex, CmdArgs, BitcodeLibFuncs)) { report_fatal_error("Failed to gen opt for split mod in thread."); @@ -654,7 +560,7 @@ static bool splitOptAndCodeGenThin(unsigned task, const Config &C, // running `opt()`. We're not reaching here as it's bailed out earlier // with `CodeGenOnly` which has been set in `SecondRoundThinBackend`. if (IRAddStream) - cgdata::saveModuleForTwoRounds(*MPart, task + CurrentThreadId, + cgdata::saveModuleForTwoRounds(*MPart, PartitionId, IRAddStream); } @@ -668,91 +574,18 @@ static bool splitOptAndCodeGenThin(unsigned task, const Config &C, } } - auto splitStream = [&](unsigned task, const Twine &moduleName) - -> Expected<std::unique_ptr<CachedFileStream>> { - int FD; - SmallString<128> TempFilename; - if (std::error_code EC = sys::fs::createTemporaryFile( - "thinlto-split", "o", FD, TempFilename)) - return errorCodeToError(EC); - - TempObjectFiles[PartitionId] = std::string(TempFilename.str()); - TempFileRemovers[PartitionId].setFile(TempObjectFiles[PartitionId]); - - auto OS = std::make_unique<raw_fd_ostream>( - FD, true, /*CloseOnDestruct*/true); - - auto Stream = std::make_unique<CachedFileStream>( - std::move(OS), std::string(TempFilename.str())); - - return std::move(Stream); - }; - - codegen(C, ThreadTM.get(), splitStream, UniqueTaskId, *MPart, + // FIXME: For distributed ThinLTO, the current 'Addstream' callbcak needs + // to be reconstructed to support emitting multiple split submodules. + codegen(C, ThreadTM.get(), AddStream, PartitionId, *MPart, CombinedIndex); }; SplitModuleCG.SplitModule(HandleModulePartition, C); - // Use ld.lld to combine the partitions into a object. - if (TempObjectFiles.empty()) { - llvm::errs() << "TempObjectFiles.empty()\n"; - return true; - } - - auto FinalStream = AddStream(task, Mod.getModuleIdentifier()); - if (!FinalStream) - report_fatal_error("Failed to open final output stream"); - - int MergedFD; - SmallString<128> MergedFilename; - if (sys::fs::createTemporaryFile("thinlto-merged", "o", MergedFD, - MergedFilename)) - report_fatal_error("Failed to create merged temp file."); - llvm::FileRemover MergedFileRemover(MergedFilename); - sys::fs::closeFile(MergedFD); - - std::vector<StringRef> Args; - std::string LinkerPath = ""; - if (auto Path = sys::findProgramByName("ld.lld")) - LinkerPath = *Path; - else if (auto Path = sys::findProgramByName("ld")) - LinkerPath = *Path; - - if (LinkerPath.empty()) - report_fatal_error("Cannot find linkeer (ld or ld.lld) to merge partitions."); - - Args.push_back(LinkerPath); - Args.push_back("-r"); - Args.push_back("-o"); - Args.push_back(MergedFilename); - - for (const auto &File : TempObjectFiles) - Args.push_back(File); - - std::string ErrMsg; - int Result = sys::ExecuteAndWait(LinkerPath, Args, /*Env=*/std::nullopt, - /*Redirects=*/{}, /*SecondsToWait=*/0, - /*MemoryLimit=*/0, &ErrMsg); - - if (Result != 0) { - errs() << "Linker failed: " << ErrMsg << "\n"; - report_fatal_error("Failed to merge split objects."); - } + // TODO: After CodeGen emission, an arbitrary number of split submodules will + // be generated. These fragments need to be merged before the final link + // stage to prevent disruptions to the distrubuted ThinLTO workflow. - { - std::unique_ptr<CachedFileStream> &FinalFileStream = *FinalStream; - auto BufferOrErr = MemoryBuffer::getFile(MergedFilename); - if (!BufferOrErr) - report_fatal_error("Failed to read merged object."); - - FinalFileStream->OS->write(BufferOrErr.get()->getBufferStart(), - BufferOrErr.get()->getBufferSize()); - if (Error Err = FinalFileStream->commit()) { - report_fatal_error(Twine("Failed to commit final file stream: ") + - toString(std::move(Err))); - } - } return true; } @@ -920,21 +753,9 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream, // the module, if applicable. Mod.setPartialSampleProfileRatio(CombinedIndex); - bool ProfitableToSplit = true; - if (ThinLTOSplit) { - if (!canDoSplitModule(Mod) || !HasLargeCG(Mod, CombinedIndex)) { - ProfitableToSplit = false; - LLVM_DEBUG(dbgs() << "warning: thinlto split not enable for module: " - << Mod.getName()); - } else { - LLVM_DEBUG(dbgs() << "thinlto: split codegen for module: " - << Mod.getName()); - } - } - LLVM_DEBUG(dbgs() << "Running ThinLTO\n"); if (CodeGenOnly) { - if (ThinLTOSplit && ProfitableToSplit) + if (ThinLTOSplit) splitOptAndCodeGenThin(Task, Conf, TM.get(), AddStream, ThinLTOSplitPartitions, Mod, CombinedIndex, CmdArgs, false, IRAddStream, BitcodeLibFuncs); @@ -951,7 +772,7 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream, auto OptimizeAndCodegen = [&](Module &Mod, TargetMachine *TM, LLVMRemarkFileHandle DiagnosticOutputFile) { - if (ThinLTOSplit && ProfitableToSplit) { + if (ThinLTOSplit) { if (!splitOptAndCodeGenThin( Task, Conf, TM, AddStream, ThinLTOSplitPartitions, Mod, CombinedIndex, CmdArgs, true, IRAddStream, BitcodeLibFuncs)) diff --git a/llvm/test/Transforms/SplitModuleCG/split-promoted-rename.ll b/llvm/test/Transforms/SplitModuleCG/split-promoted-rename.ll deleted file mode 100644 index 6c51141a9ad85..0000000000000 --- a/llvm/test/Transforms/SplitModuleCG/split-promoted-rename.ll +++ /dev/null @@ -1,41 +0,0 @@ -; Test that internal symbols promoted during module splitting are consistently -; renamed with an MD5 suffix across all partitions. -; -; RUN: opt -module-summary %s -o %t.bc -; RUN: llvm-lto2 run %t.bc -o %t \ -; RUN: -thinlto-split=true \ -; RUN: -thinlto-split-partitions=2 -thinlto-split-module-size-threshold=0 \ -; RUN: -r=%t.bc,caller_a,px \ -; RUN: -r=%t.bc,caller_b,px -; RUN: llvm-nm %t.1 | FileCheck %s - -; CHECK-DAG: T caller_a -; CHECK-DAG: T caller_b -; CHECK: T {{.*promoted_internal[._][0-9a-f]+.*}} -; CHECK-NOT: T promoted_internal{{$}} - -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; @promoted_internal is internal. SplitModuleCG::dealWithMpart's checkPromoted -; records it in PromotedRenames. splitOptAndCodeGenThin applies the rename -; after opt via: -; for (auto &GV : MPart->global_values()) -; if (auto It = PromotedRenames.find(GV.getName()); ...) -; GV.setName(It->second); -define internal void @promoted_internal() { -entry: - ret void -} - -define void @caller_a() { -entry: - call void @promoted_internal() - ret void -} - -define void @caller_b() { -entry: - call void @promoted_internal() - ret void -} >From 1c3b0b0271043d2d53fc44b201c54132707c834c Mon Sep 17 00:00:00 2001 From: maojiaping <[email protected]> Date: Thu, 11 Jun 2026 10:15:49 +0800 Subject: [PATCH 6/6] [LTO][SplitModuleCG] Enable split module by callgragh for FullLTO - Rename ThinLTOSplit to LTOSplitByCG for clarity - Add IsThinLTO parameter to splitOptAndCodeGenThin with default true - Enable splitOptAndCodeGenThin for FullLTO via else if branch --- .../thinlto-split/fulllto-split-module.c | 26 +++++++++++ .../thinlto-split/thinlto-split-module.c | 34 ++++++++++++++ llvm/lib/LTO/LTOBackend.cpp | 46 +++++++++++-------- 3 files changed, 87 insertions(+), 19 deletions(-) create mode 100644 clang/test/CodeGen/thinlto-split/fulllto-split-module.c create mode 100644 clang/test/CodeGen/thinlto-split/thinlto-split-module.c diff --git a/clang/test/CodeGen/thinlto-split/fulllto-split-module.c b/clang/test/CodeGen/thinlto-split/fulllto-split-module.c new file mode 100644 index 0000000000000..b3cf7081ee2e0 --- /dev/null +++ b/clang/test/CodeGen/thinlto-split/fulllto-split-module.c @@ -0,0 +1,26 @@ +// UNSUPPORTED: system-windows +// REQUIRES: aarch64-registered-target + +// RUN: %clang -flto=full -fuse-ld=lld -shared \ +// RUN: -o %t.o %s \ +// RUN: -Wl,-mllvm,-lto-split-by-callgraph=true \ +// RUN: -Wl,--lto-partitions=2 \ +// RUN: -Wl,--save-temps=prelink +// RUN: llvm-nm %t.o.lto.o | FileCheck %s --check-prefix=CHECK0 +// RUN: llvm-nm %t.o.lto.1.o | FileCheck %s --check-prefix=CHECK1 + +// CHECK0-DAG: T caller_b +// CHECK0-DAG: T promoted_internal + +// CHECK1-DAG: T caller_a +// CHECK1-DAG: U promoted_internal + +static void promoted_internal(void) {} + +void caller_a(void) { + promoted_internal(); +} + +void caller_b(void) { + promoted_internal(); +} \ No newline at end of file diff --git a/clang/test/CodeGen/thinlto-split/thinlto-split-module.c b/clang/test/CodeGen/thinlto-split/thinlto-split-module.c new file mode 100644 index 0000000000000..0725fe49f3e6c --- /dev/null +++ b/clang/test/CodeGen/thinlto-split/thinlto-split-module.c @@ -0,0 +1,34 @@ +// UNSUPPORTED: system-windows +// REQUIRES: aarch64-registered-target + +// Distributed ThinLTO (DTLTO) +// RUN: %clang -flto=thin -c %s -o %t.o +// RUN: %clang -flto=thin -fuse-ld=lld -Wl,--thinlto-index-only %t.o +// RUN: not --crash %clang %t.o -c -fthinlto-index=%t.o.thinlto.bc \ +// RUN: -mllvm -lto-split-by-callgraph=true \ +// RUN: -mllvm -lto-split-partitions=2 +// +// Regular ThinLTO +// RUN: %clang -flto=thin -fuse-ld=lld -shared \ +// RUN: -o %t.o %s \ +// RUN: -Wl,-mllvm,-lto-split-by-callgraph=true \ +// RUN: -Wl,-mllvm,-lto-split-partitions=2 \ +// RUN: -Wl,--save-temps=prelink +// RUN: llvm-nm %t.o.lto.o | FileCheck %s --check-prefix=CHECK0 +// RUN: llvm-nm %t.o.lto.1.o | FileCheck %s --check-prefix=CHECK1 + +// CHECK0-DAG: T caller_b +// CHECK0-DAG: T {{promoted_internal[.][0-9a-f]+}} + +// CHECK1-DAG: T caller_a +// CHECK1-DAG: U {{promoted_internal[.][0-9a-f]+}} + +static void promoted_internal(void) {} + +void caller_a(void) { + promoted_internal(); +} + +void caller_b(void) { + promoted_internal(); +} \ No newline at end of file diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index 2608e2eb54398..33182a96283c9 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -84,12 +84,12 @@ static cl::list<std::string> "path matches this for -save-temps options"), cl::CommaSeparated, cl::Hidden); -static cl::opt<unsigned> ThinLTOSplitPartitions( - "thinlto-split-partitions", cl::Hidden, cl::init(0), - cl::desc("Control split to how many partitions in thinlto backend.")); +static cl::opt<unsigned> LTOSplitPartitions( + "lto-split-partitions", cl::Hidden, cl::init(0), + cl::desc("Control split to how many partitions in lto backend.")); -static cl::opt<bool> ThinLTOSplit("thinlto-split", cl::init(false), - cl::desc("Enable split module in thinlto backend.")); +static cl::opt<bool> LTOSplitByCG("lto-split-by-callgraph", cl::init(false), + cl::desc("Enable split module in lto backend.")); namespace llvm { extern cl::opt<bool> NoPGOWarnMismatch; @@ -146,7 +146,7 @@ Error Config::addSaveTemps(std::string OutputFileName, bool UseInputModulePath, // named from the provided OutputFileName with the Task ID appended. if (M.getModuleIdentifier() == "ld-temp.o" || !UseInputModulePath) { PathPrefix = OutputFileName; - if (ThinLTOSplit) + if (LTOSplitByCG) PathPrefix += extract_filename(M.getSourceFileName()) + "."; if (Task != (unsigned)-1) PathPrefix += utostr(Task) + "."; @@ -538,7 +538,8 @@ static bool splitOptAndCodeGenThin(unsigned task, const Config &C, const ModuleSummaryIndex &CombinedIndex, const std::vector<uint8_t> &CmdArgs, bool DoOpt, AddStreamFn IRAddStream, - ArrayRef<StringRef> &BitcodeLibFuncs) { + ArrayRef<StringRef> &BitcodeLibFuncs, + bool IsThinLTO = true) { const Target *T = &TM->getTarget(); SplitModuleCG SplitModuleCG(Mod, CombinedIndex, ParallelCodeGenParallelismLevel); @@ -563,14 +564,16 @@ static bool splitOptAndCodeGenThin(unsigned task, const Config &C, cgdata::saveModuleForTwoRounds(*MPart, PartitionId, IRAddStream); } - - // Rename the GlobalValues whose internal is changed to external. That's - // can avoid duplicate symbols. - auto PromotedRenames = SplitModuleCG.getPromotedRenames(); - for (auto &GV : MPart->global_values()) { - if (auto It = PromotedRenames.find(GV.getName()); - It != PromotedRenames.end()) { - GV.setName(It->second); + + if (IsThinLTO) { + // Rename the GlobalValues whose internal is changed to external. That's + // can avoid duplicate symbols int ThinLTO. + auto PromotedRenames = SplitModuleCG.getPromotedRenames(); + for (auto &GV : MPart->global_values()) { + if (auto It = PromotedRenames.find(GV.getName()); + It != PromotedRenames.end()) { + GV.setName(It->second); + } } } @@ -690,6 +693,11 @@ Error lto::backend(const Config &C, AddStreamFn AddStream, if (ParallelCodeGenParallelismLevel == 1) { codegen(C, TM.get(), AddStream, 0, Mod, CombinedIndex); + } else if (LTOSplitByCG) { + splitOptAndCodeGenThin(/*Task*/0, C, TM.get(), AddStream, + ParallelCodeGenParallelismLevel, Mod, CombinedIndex, + /*CmdArgs*/ std::vector<uint8_t>(), /*DoOpt*/false, + AddStreamFn(), BitcodeLibFuncs, false); } else { splitCodeGen(C, TM.get(), AddStream, ParallelCodeGenParallelismLevel, Mod, CombinedIndex); @@ -755,9 +763,9 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream, LLVM_DEBUG(dbgs() << "Running ThinLTO\n"); if (CodeGenOnly) { - if (ThinLTOSplit) + if (LTOSplitByCG) splitOptAndCodeGenThin(Task, Conf, TM.get(), AddStream, - ThinLTOSplitPartitions, Mod, CombinedIndex, + LTOSplitPartitions, Mod, CombinedIndex, CmdArgs, false, IRAddStream, BitcodeLibFuncs); else // If CodeGenOnly is set, we only perform code generation and skip @@ -772,9 +780,9 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream, auto OptimizeAndCodegen = [&](Module &Mod, TargetMachine *TM, LLVMRemarkFileHandle DiagnosticOutputFile) { - if (ThinLTOSplit) { + if (LTOSplitByCG) { if (!splitOptAndCodeGenThin( - Task, Conf, TM, AddStream, ThinLTOSplitPartitions, Mod, + Task, Conf, TM, AddStream, LTOSplitPartitions, Mod, CombinedIndex, CmdArgs, true, IRAddStream, BitcodeLibFuncs)) return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); } else { _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
