https://github.com/jmmartinez updated https://github.com/llvm/llvm-project/pull/162660
From 1271525b661eb47828fb40d04a05e772840326e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <[email protected]> Date: Thu, 9 Oct 2025 14:57:56 +0200 Subject: [PATCH 01/12] Pre-Commit test: [LLVM] Maintain element type of @llvm.compiler.used/@llvm.used if it already exists This new test fails with: /home/juamarti/llvm/_llvm/llvm/lib/IR/Constants.cpp:1327: static Constant *llvm::ConstantArray::getImpl(ArrayType *, ArrayRef<Constant *>): Assertion `C->getType() == Ty->getElementType() && "Wrong type in array element initializer"' failed. --- .../Transforms/Utils/ModuleUtilsTest.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp b/llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp index d4094c5307060..0cc408af43bc5 100644 --- a/llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp +++ b/llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp @@ -69,6 +69,23 @@ TEST(ModuleUtils, AppendToUsedList2) { EXPECT_EQ(1, getListSize(*M, "llvm.used")); } +TEST(ModuleUtils, AppendToUsedList3) { + LLVMContext C; + + std::unique_ptr<Module> M = parseIR(C, R"( + @x = addrspace(1) global [2 x i32] zeroinitializer, align 4 + @y = addrspace(2) global [2 x i32] zeroinitializer, align 4 + @llvm.compiler.used = appending global [1 x ptr addrspace (3)] [ptr addrspace(3) addrspacecast (ptr addrspace (1) @x to ptr addrspace(3))] + )"); + GlobalVariable *X = M->getNamedGlobal("x"); + GlobalVariable *Y = M->getNamedGlobal("y"); + EXPECT_EQ(1, getListSize(*M, "llvm.compiler.used")); + appendToCompilerUsed(*M, X); + EXPECT_EQ(1, getListSize(*M, "llvm.compiler.used")); + appendToCompilerUsed(*M, Y); + EXPECT_EQ(2, getListSize(*M, "llvm.compiler.used")); +} + using AppendFnType = decltype(&appendToGlobalCtors); using TransformFnType = decltype(&transformGlobalCtors); using ParamType = std::tuple<StringRef, AppendFnType, TransformFnType>; From f2abc6ecd0d908d0f54f001477034de60ee80ccd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <[email protected]> Date: Thu, 9 Oct 2025 14:34:32 +0200 Subject: [PATCH 02/12] [LLVM] Maintain element type of @llvm.compiler.used/@llvm.used if it already exists At the moment, the pointer type stored in the llvm.compiler.used/llvm.used is not well fixed. The frontend uses a pointer to the default address space (which may not be 0; for example, it is 4 for SPIRV). This patch makes `appendToUsed/appendToCompilerUsed` match the behaviour in BitcodeWriter.cpp: if the variable already exists, preserve its element type, otherwise use `ptr addrspace (0)`. This fixes the following error in the newly added test: UtilsTests: /home/juamarti/llvm/_llvm/llvm/lib/IR/Constants.cpp:1327: static Constant *llvm::ConstantArray::getImpl(ArrayType *, ArrayRef<Constant *>): Assertion `C->getType() == Ty->getElementType() && "Wrong type in array element initializer"' failed. --- llvm/lib/Transforms/Utils/ModuleUtils.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp index 596849ecab742..d1acb0ff1ad6b 100644 --- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -138,10 +138,11 @@ static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> SmallSetVector<Constant *, 16> Init; collectUsedGlobals(GV, Init); + Type *ArrayEltTy = GV ? GV->getValueType()->getArrayElementType() + : PointerType::getUnqual(M.getContext()); if (GV) GV->eraseFromParent(); - Type *ArrayEltTy = llvm::PointerType::getUnqual(M.getContext()); for (auto *V : Values) Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy)); From bad3df5de143f3d6dc5c8fd912cc08a0df08ab4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <[email protected]> Date: Thu, 9 Oct 2025 14:34:24 +0200 Subject: [PATCH 03/12] [NFC][BitcodeWriter] Use appendToCompilerUsed instead of custom implementation --- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 70 ++++++++++------------- llvm/lib/Bitcode/Writer/CMakeLists.txt | 1 + 2 files changed, 31 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index f17656c7c3b03..c9f92bee4caa6 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -75,6 +75,7 @@ #include "llvm/Support/SHA1.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/Triple.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" #include <algorithm> #include <cassert> #include <cstddef> @@ -5865,25 +5866,25 @@ static const char *getSectionNameForCommandline(const Triple &T) { void llvm::embedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf, bool EmbedBitcode, bool EmbedCmdline, const std::vector<uint8_t> &CmdArgs) { - // Save llvm.compiler.used and remove it. - SmallVector<Constant *, 2> UsedArray; - SmallVector<GlobalValue *, 4> UsedGlobals; - GlobalVariable *Used = collectUsedGlobalVariables(M, UsedGlobals, true); - Type *UsedElementType = Used ? Used->getValueType()->getArrayElementType() - : PointerType::getUnqual(M.getContext()); - for (auto *GV : UsedGlobals) { - if (GV->getName() != "llvm.embedded.module" && - GV->getName() != "llvm.cmdline") - UsedArray.push_back( - ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); - } - if (Used) - Used->eraseFromParent(); // Embed the bitcode for the llvm module. std::string Data; ArrayRef<uint8_t> ModuleData; Triple T(M.getTargetTriple()); + SmallVector<GlobalValue *, 2> NewGlobals; + + auto IsCmdOrBitcode = [&](Constant *C) { + GlobalVariable *GV = dyn_cast<GlobalVariable>(C); + StringRef Name = GV ? GV->getName() : ""; + if (EmbedBitcode && Name == "llvm.embedded.module") + return true; + if (EmbedCmdline && Name == "llvm.cmdline") + return true; + return false; + }; + + if (EmbedBitcode || EmbedCmdline) + removeFromUsedLists(M, IsCmdOrBitcode); if (EmbedBitcode) { if (Buf.getBufferSize() == 0 || @@ -5902,23 +5903,22 @@ void llvm::embedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf, } llvm::Constant *ModuleConstant = llvm::ConstantDataArray::get(M.getContext(), ModuleData); - llvm::GlobalVariable *GV = new llvm::GlobalVariable( + llvm::GlobalVariable *EmbeddedModule = new llvm::GlobalVariable( M, ModuleConstant->getType(), true, llvm::GlobalValue::PrivateLinkage, ModuleConstant); - GV->setSection(getSectionNameForBitcode(T)); + EmbeddedModule->setSection(getSectionNameForBitcode(T)); // Set alignment to 1 to prevent padding between two contributions from input // sections after linking. - GV->setAlignment(Align(1)); - UsedArray.push_back( - ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); + EmbeddedModule->setAlignment(Align(1)); + NewGlobals.push_back(EmbeddedModule); if (llvm::GlobalVariable *Old = M.getGlobalVariable("llvm.embedded.module", true)) { assert(Old->hasZeroLiveUses() && "llvm.embedded.module can only be used once in llvm.compiler.used"); - GV->takeName(Old); + EmbeddedModule->takeName(Old); Old->eraseFromParent(); } else { - GV->setName("llvm.embedded.module"); + EmbeddedModule->setName("llvm.embedded.module"); } // Skip if only bitcode needs to be embedded. @@ -5928,30 +5928,20 @@ void llvm::embedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf, CmdArgs.size()); llvm::Constant *CmdConstant = llvm::ConstantDataArray::get(M.getContext(), CmdData); - GV = new llvm::GlobalVariable(M, CmdConstant->getType(), true, - llvm::GlobalValue::PrivateLinkage, - CmdConstant); - GV->setSection(getSectionNameForCommandline(T)); - GV->setAlignment(Align(1)); - UsedArray.push_back( - ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); + GlobalVariable *CmdLine = new llvm::GlobalVariable( + M, CmdConstant->getType(), true, llvm::GlobalValue::PrivateLinkage, + CmdConstant); + CmdLine->setSection(getSectionNameForCommandline(T)); + CmdLine->setAlignment(Align(1)); if (llvm::GlobalVariable *Old = M.getGlobalVariable("llvm.cmdline", true)) { assert(Old->hasZeroLiveUses() && "llvm.cmdline can only be used once in llvm.compiler.used"); - GV->takeName(Old); + CmdLine->takeName(Old); Old->eraseFromParent(); } else { - GV->setName("llvm.cmdline"); + CmdLine->setName("llvm.cmdline"); } + NewGlobals.push_back(CmdLine); + appendToCompilerUsed(M, NewGlobals); } - - if (UsedArray.empty()) - return; - - // Recreate llvm.compiler.used. - ArrayType *ATy = ArrayType::get(UsedElementType, UsedArray.size()); - auto *NewUsed = new GlobalVariable( - M, ATy, false, llvm::GlobalValue::AppendingLinkage, - llvm::ConstantArray::get(ATy, UsedArray), "llvm.compiler.used"); - NewUsed->setSection("llvm.metadata"); } diff --git a/llvm/lib/Bitcode/Writer/CMakeLists.txt b/llvm/lib/Bitcode/Writer/CMakeLists.txt index 2c508ca9fae95..5bbb872a90341 100644 --- a/llvm/lib/Bitcode/Writer/CMakeLists.txt +++ b/llvm/lib/Bitcode/Writer/CMakeLists.txt @@ -15,4 +15,5 @@ add_llvm_component_library(LLVMBitWriter ProfileData Support TargetParser + TransformUtils ) From 5279d3be4f8506b074746deab9cfa11944e51349 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <[email protected]> Date: Fri, 10 Oct 2025 09:52:55 +0200 Subject: [PATCH 04/12] [Review] Remove lambda from embedBitcodeInModule --- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index c9f92bee4caa6..6509f39418bb5 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -5863,6 +5863,12 @@ static const char *getSectionNameForCommandline(const Triple &T) { llvm_unreachable("Unimplemented ObjectFormatType"); } +static auto globalInUsedHasName(StringRef Name) { + return [Name](Constant* C) { + return C->getName() == Name; + }; +} + void llvm::embedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf, bool EmbedBitcode, bool EmbedCmdline, const std::vector<uint8_t> &CmdArgs) { @@ -5873,19 +5879,6 @@ void llvm::embedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf, Triple T(M.getTargetTriple()); SmallVector<GlobalValue *, 2> NewGlobals; - auto IsCmdOrBitcode = [&](Constant *C) { - GlobalVariable *GV = dyn_cast<GlobalVariable>(C); - StringRef Name = GV ? GV->getName() : ""; - if (EmbedBitcode && Name == "llvm.embedded.module") - return true; - if (EmbedCmdline && Name == "llvm.cmdline") - return true; - return false; - }; - - if (EmbedBitcode || EmbedCmdline) - removeFromUsedLists(M, IsCmdOrBitcode); - if (EmbedBitcode) { if (Buf.getBufferSize() == 0 || !isBitcode((const unsigned char *)Buf.getBufferStart(), @@ -5913,6 +5906,7 @@ void llvm::embedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf, NewGlobals.push_back(EmbeddedModule); if (llvm::GlobalVariable *Old = M.getGlobalVariable("llvm.embedded.module", true)) { + removeFromUsedLists(M, globalInUsedHasName("llvm.embedded.module")); assert(Old->hasZeroLiveUses() && "llvm.embedded.module can only be used once in llvm.compiler.used"); EmbeddedModule->takeName(Old); @@ -5934,6 +5928,7 @@ void llvm::embedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf, CmdLine->setSection(getSectionNameForCommandline(T)); CmdLine->setAlignment(Align(1)); if (llvm::GlobalVariable *Old = M.getGlobalVariable("llvm.cmdline", true)) { + removeFromUsedLists(M, globalInUsedHasName("llvm.cmdline")); assert(Old->hasZeroLiveUses() && "llvm.cmdline can only be used once in llvm.compiler.used"); CmdLine->takeName(Old); From 7691a6f2ffc06895b5041d8049e054a8147ebae3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <[email protected]> Date: Fri, 10 Oct 2025 09:54:55 +0200 Subject: [PATCH 05/12] [Review] 'addrspace (n)' -> 'addrspace(n)' --- llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp b/llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp index 0cc408af43bc5..cbdb4dcbc9a6f 100644 --- a/llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp +++ b/llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp @@ -75,7 +75,7 @@ TEST(ModuleUtils, AppendToUsedList3) { std::unique_ptr<Module> M = parseIR(C, R"( @x = addrspace(1) global [2 x i32] zeroinitializer, align 4 @y = addrspace(2) global [2 x i32] zeroinitializer, align 4 - @llvm.compiler.used = appending global [1 x ptr addrspace (3)] [ptr addrspace(3) addrspacecast (ptr addrspace (1) @x to ptr addrspace(3))] + @llvm.compiler.used = appending global [1 x ptr addrspace(3)] [ptr addrspace(3) addrspacecast (ptr addrspace(1) @x to ptr addrspace(3))] )"); GlobalVariable *X = M->getNamedGlobal("x"); GlobalVariable *Y = M->getNamedGlobal("y"); From 68d30f5d7107effcebb5702d4127fa59ba35f53e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <[email protected]> Date: Fri, 10 Oct 2025 10:40:58 +0200 Subject: [PATCH 06/12] [Review] Forgot again to run clang-format --- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 6509f39418bb5..8a132c84265f2 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -5864,9 +5864,7 @@ static const char *getSectionNameForCommandline(const Triple &T) { } static auto globalInUsedHasName(StringRef Name) { - return [Name](Constant* C) { - return C->getName() == Name; - }; + return [Name](Constant *C) { return C->getName() == Name; }; } void llvm::embedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf, From 57690b1dde345f5cda3ca95a25005575b19900ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <[email protected]> Date: Thu, 16 Oct 2025 09:51:00 +0200 Subject: [PATCH 07/12] [Review] Move llvm.used/llvm.compiler.used helpers from ModuleUtils to GlobalValue; and remove dependency between BitcodeWriter & TransformUtils --- llvm/include/llvm/IR/GlobalValue.h | 12 +++ .../llvm/Transforms/Utils/ModuleUtils.h | 12 --- llvm/lib/Bitcode/Writer/CMakeLists.txt | 1 - llvm/lib/CodeGen/JMCInstrumenter.cpp | 1 - llvm/lib/IR/Globals.cpp | 78 +++++++++++++++++++ .../Target/AMDGPU/AMDGPUCtorDtorLowering.cpp | 1 - .../Target/NVPTX/NVPTXCtorDtorLowering.cpp | 1 - llvm/lib/Transforms/Utils/ModuleUtils.cpp | 77 ------------------ .../Utils/SampleProfileLoaderBaseUtil.cpp | 1 - 9 files changed, 90 insertions(+), 94 deletions(-) diff --git a/llvm/include/llvm/IR/GlobalValue.h b/llvm/include/llvm/IR/GlobalValue.h index 83e695cdd27d9..8f6726bd99335 100644 --- a/llvm/include/llvm/IR/GlobalValue.h +++ b/llvm/include/llvm/IR/GlobalValue.h @@ -684,6 +684,18 @@ class GlobalValue : public Constant { LLVM_ABI bool canBeOmittedFromSymbolTable() const; }; +/// Adds global values to the llvm.used list. +LLVM_ABI void appendToUsed(Module &M, ArrayRef<GlobalValue *> Values); + +/// Adds global values to the llvm.compiler.used list. +LLVM_ABI void appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values); + +/// Removes global values from the llvm.used and llvm.compiler.used arrays. \p +/// ShouldRemove should return true for any initializer field that should not be +/// included in the replacement global. +LLVM_ABI void removeFromUsedLists(Module &M, + function_ref<bool(Constant *)> ShouldRemove); + } // end namespace llvm #endif // LLVM_IR_GLOBALVALUE_H diff --git a/llvm/include/llvm/Transforms/Utils/ModuleUtils.h b/llvm/include/llvm/Transforms/Utils/ModuleUtils.h index 4036c4e947c75..98bb4579f3a6c 100644 --- a/llvm/include/llvm/Transforms/Utils/ModuleUtils.h +++ b/llvm/include/llvm/Transforms/Utils/ModuleUtils.h @@ -96,18 +96,6 @@ getOrCreateSanitizerCtorAndInitFunctions( /// the list of public globals in the module. LLVM_ABI bool nameUnamedGlobals(Module &M); -/// Adds global values to the llvm.used list. -LLVM_ABI void appendToUsed(Module &M, ArrayRef<GlobalValue *> Values); - -/// Adds global values to the llvm.compiler.used list. -LLVM_ABI void appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values); - -/// Removes global values from the llvm.used and llvm.compiler.used arrays. \p -/// ShouldRemove should return true for any initializer field that should not be -/// included in the replacement global. -LLVM_ABI void removeFromUsedLists(Module &M, - function_ref<bool(Constant *)> ShouldRemove); - /// Filter out potentially dead comdat functions where other entries keep the /// entire comdat group alive. /// diff --git a/llvm/lib/Bitcode/Writer/CMakeLists.txt b/llvm/lib/Bitcode/Writer/CMakeLists.txt index 5bbb872a90341..2c508ca9fae95 100644 --- a/llvm/lib/Bitcode/Writer/CMakeLists.txt +++ b/llvm/lib/Bitcode/Writer/CMakeLists.txt @@ -15,5 +15,4 @@ add_llvm_component_library(LLVMBitWriter ProfileData Support TargetParser - TransformUtils ) diff --git a/llvm/lib/CodeGen/JMCInstrumenter.cpp b/llvm/lib/CodeGen/JMCInstrumenter.cpp index e2aaebedf5a4f..b1c05294ac4b5 100644 --- a/llvm/lib/CodeGen/JMCInstrumenter.cpp +++ b/llvm/lib/CodeGen/JMCInstrumenter.cpp @@ -36,7 +36,6 @@ #include "llvm/Pass.h" #include "llvm/Support/DJB.h" #include "llvm/Support/Path.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp index c3a472b0cc66d..609c463d95d40 100644 --- a/llvm/lib/IR/Globals.cpp +++ b/llvm/lib/IR/Globals.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "LLVMContextImpl.h" +#include "llvm/ADT/SetVector.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" @@ -671,3 +672,80 @@ void GlobalIFunc::applyAlongResolverPath( DenseSet<const GlobalAlias *> Aliases; findBaseObject(getResolver(), Aliases, Op); } + +static void collectUsedGlobals(GlobalVariable *GV, + SmallSetVector<Constant *, 16> &Init) { + if (!GV || !GV->hasInitializer()) + return; + + auto *CA = cast<ConstantArray>(GV->getInitializer()); + for (Use &Op : CA->operands()) + Init.insert(cast<Constant>(Op)); +} + +static void appendToUsedList(Module &M, StringRef Name, + ArrayRef<GlobalValue *> Values) { + GlobalVariable *GV = M.getGlobalVariable(Name); + + SmallSetVector<Constant *, 16> Init; + collectUsedGlobals(GV, Init); + Type *ArrayEltTy = GV ? GV->getValueType()->getArrayElementType() + : PointerType::getUnqual(M.getContext()); + if (GV) + GV->eraseFromParent(); + + for (auto *V : Values) + Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy)); + + if (Init.empty()) + return; + + ArrayType *ATy = ArrayType::get(ArrayEltTy, Init.size()); + GV = new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage, + ConstantArray::get(ATy, Init.getArrayRef()), Name); + GV->setSection("llvm.metadata"); +} + +void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) { + appendToUsedList(M, "llvm.used", Values); +} + +void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) { + appendToUsedList(M, "llvm.compiler.used", Values); +} + +static void removeFromUsedList(Module &M, StringRef Name, + function_ref<bool(Constant *)> ShouldRemove) { + GlobalVariable *GV = M.getNamedGlobal(Name); + if (!GV) + return; + + SmallSetVector<Constant *, 16> Init; + collectUsedGlobals(GV, Init); + + Type *ArrayEltTy = cast<ArrayType>(GV->getValueType())->getElementType(); + + SmallVector<Constant *, 16> NewInit; + for (Constant *MaybeRemoved : Init) { + if (!ShouldRemove(MaybeRemoved->stripPointerCasts())) + NewInit.push_back(MaybeRemoved); + } + + if (!NewInit.empty()) { + ArrayType *ATy = ArrayType::get(ArrayEltTy, NewInit.size()); + GlobalVariable *NewGV = + new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage, + ConstantArray::get(ATy, NewInit), "", GV, + GV->getThreadLocalMode(), GV->getAddressSpace()); + NewGV->setSection(GV->getSection()); + NewGV->takeName(GV); + } + + GV->eraseFromParent(); +} + +void llvm::removeFromUsedLists(Module &M, + function_ref<bool(Constant *)> ShouldRemove) { + removeFromUsedList(M, "llvm.used", ShouldRemove); + removeFromUsedList(M, "llvm.compiler.used", ShouldRemove); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp index a774ad53b5bed..bd1727571f41a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp @@ -19,7 +19,6 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Value.h" #include "llvm/Pass.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; diff --git a/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp index 4e069398d540f..18618ca19beee 100644 --- a/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp @@ -24,7 +24,6 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/MD5.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp index d1acb0ff1ad6b..81ddcf06c49d2 100644 --- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -123,83 +123,6 @@ void llvm::transformGlobalDtors(Module &M, const GlobalCtorTransformFn &Fn) { transformGlobalArray("llvm.global_dtors", M, Fn); } -static void collectUsedGlobals(GlobalVariable *GV, - SmallSetVector<Constant *, 16> &Init) { - if (!GV || !GV->hasInitializer()) - return; - - auto *CA = cast<ConstantArray>(GV->getInitializer()); - for (Use &Op : CA->operands()) - Init.insert(cast<Constant>(Op)); -} - -static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) { - GlobalVariable *GV = M.getGlobalVariable(Name); - - SmallSetVector<Constant *, 16> Init; - collectUsedGlobals(GV, Init); - Type *ArrayEltTy = GV ? GV->getValueType()->getArrayElementType() - : PointerType::getUnqual(M.getContext()); - if (GV) - GV->eraseFromParent(); - - for (auto *V : Values) - Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy)); - - if (Init.empty()) - return; - - ArrayType *ATy = ArrayType::get(ArrayEltTy, Init.size()); - GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage, - ConstantArray::get(ATy, Init.getArrayRef()), - Name); - GV->setSection("llvm.metadata"); -} - -void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) { - appendToUsedList(M, "llvm.used", Values); -} - -void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) { - appendToUsedList(M, "llvm.compiler.used", Values); -} - -static void removeFromUsedList(Module &M, StringRef Name, - function_ref<bool(Constant *)> ShouldRemove) { - GlobalVariable *GV = M.getNamedGlobal(Name); - if (!GV) - return; - - SmallSetVector<Constant *, 16> Init; - collectUsedGlobals(GV, Init); - - Type *ArrayEltTy = cast<ArrayType>(GV->getValueType())->getElementType(); - - SmallVector<Constant *, 16> NewInit; - for (Constant *MaybeRemoved : Init) { - if (!ShouldRemove(MaybeRemoved->stripPointerCasts())) - NewInit.push_back(MaybeRemoved); - } - - if (!NewInit.empty()) { - ArrayType *ATy = ArrayType::get(ArrayEltTy, NewInit.size()); - GlobalVariable *NewGV = - new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage, - ConstantArray::get(ATy, NewInit), "", GV, - GV->getThreadLocalMode(), GV->getAddressSpace()); - NewGV->setSection(GV->getSection()); - NewGV->takeName(GV); - } - - GV->eraseFromParent(); -} - -void llvm::removeFromUsedLists(Module &M, - function_ref<bool(Constant *)> ShouldRemove) { - removeFromUsedList(M, "llvm.used", ShouldRemove); - removeFromUsedList(M, "llvm.compiler.used", ShouldRemove); -} - void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) { if (!M.getModuleFlag("kcfi")) return; diff --git a/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp b/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp index f7ae6ad844948..324f6194a2d72 100644 --- a/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp +++ b/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp @@ -14,7 +14,6 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Module.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" namespace llvm { From 89d2468e97fbd17c0ec72d90042b07c37b54f690 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <[email protected]> Date: Thu, 16 Oct 2025 11:08:26 +0200 Subject: [PATCH 08/12] [Review] forgot to move the unittests --- llvm/unittests/IR/CMakeLists.txt | 1 + llvm/unittests/IR/UsedGlobalTest.cpp | 82 +++++++++++++++++++ .../Transforms/Utils/ModuleUtilsTest.cpp | 53 ------------ 3 files changed, 83 insertions(+), 53 deletions(-) create mode 100644 llvm/unittests/IR/UsedGlobalTest.cpp diff --git a/llvm/unittests/IR/CMakeLists.txt b/llvm/unittests/IR/CMakeLists.txt index d62ce66ef9d34..1e7f0932dd989 100644 --- a/llvm/unittests/IR/CMakeLists.txt +++ b/llvm/unittests/IR/CMakeLists.txt @@ -49,6 +49,7 @@ add_llvm_unittest(IRTests TypesTest.cpp UseTest.cpp UserTest.cpp + UsedGlobalTest.cpp ValueHandleTest.cpp ValueMapTest.cpp ValueTest.cpp diff --git a/llvm/unittests/IR/UsedGlobalTest.cpp b/llvm/unittests/IR/UsedGlobalTest.cpp new file mode 100644 index 0000000000000..23cedc6f774f9 --- /dev/null +++ b/llvm/unittests/IR/UsedGlobalTest.cpp @@ -0,0 +1,82 @@ +//===- UsedGlobalTest.cpp - Unit tests for Module utility ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "llvm/AsmParser/Parser.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/SourceMgr.h" +#include "gtest/gtest.h" + +using namespace llvm; + +static std::unique_ptr<Module> parseIR(LLVMContext &C, StringRef IR) { + SMDiagnostic Err; + std::unique_ptr<Module> Mod = parseAssemblyString(IR, Err, C); + if (!Mod) + Err.print("UsedGlobalTest", errs()); + return Mod; +} + +static int getListSize(Module &M, StringRef Name) { + auto *List = M.getGlobalVariable(Name); + if (!List) + return 0; + auto *T = cast<ArrayType>(List->getValueType()); + return T->getNumElements(); +} + +TEST(UsedGlobal, AppendToUsedList1) { + LLVMContext C; + + std::unique_ptr<Module> M = parseIR( + C, R"(@x = addrspace(4) global [2 x i32] zeroinitializer, align 4)"); + SmallVector<GlobalValue *, 2> Globals; + for (auto &G : M->globals()) { + Globals.push_back(&G); + } + EXPECT_EQ(0, getListSize(*M, "llvm.compiler.used")); + appendToCompilerUsed(*M, Globals); + EXPECT_EQ(1, getListSize(*M, "llvm.compiler.used")); + + EXPECT_EQ(0, getListSize(*M, "llvm.used")); + appendToUsed(*M, Globals); + EXPECT_EQ(1, getListSize(*M, "llvm.used")); +} + +TEST(UsedGlobal, AppendToUsedList2) { + LLVMContext C; + + std::unique_ptr<Module> M = + parseIR(C, R"(@x = global [2 x i32] zeroinitializer, align 4)"); + SmallVector<GlobalValue *, 2> Globals; + for (auto &G : M->globals()) { + Globals.push_back(&G); + } + EXPECT_EQ(0, getListSize(*M, "llvm.compiler.used")); + appendToCompilerUsed(*M, Globals); + EXPECT_EQ(1, getListSize(*M, "llvm.compiler.used")); + + EXPECT_EQ(0, getListSize(*M, "llvm.used")); + appendToUsed(*M, Globals); + EXPECT_EQ(1, getListSize(*M, "llvm.used")); +} + +TEST(UsedGlobal, AppendToUsedList3) { + LLVMContext C; + + std::unique_ptr<Module> M = parseIR(C, R"( + @x = addrspace(1) global [2 x i32] zeroinitializer, align 4 + @y = addrspace(2) global [2 x i32] zeroinitializer, align 4 + @llvm.compiler.used = appending global [1 x ptr addrspace(3)] [ptr addrspace(3) addrspacecast (ptr addrspace(1) @x to ptr addrspace(3))] + )"); + GlobalVariable *X = M->getNamedGlobal("x"); + GlobalVariable *Y = M->getNamedGlobal("y"); + EXPECT_EQ(1, getListSize(*M, "llvm.compiler.used")); + appendToCompilerUsed(*M, X); + EXPECT_EQ(1, getListSize(*M, "llvm.compiler.used")); + appendToCompilerUsed(*M, Y); + EXPECT_EQ(2, getListSize(*M, "llvm.compiler.used")); +} diff --git a/llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp b/llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp index cbdb4dcbc9a6f..f1c984717096f 100644 --- a/llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp +++ b/llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp @@ -33,59 +33,6 @@ static int getListSize(Module &M, StringRef Name) { return T->getNumElements(); } -TEST(ModuleUtils, AppendToUsedList1) { - LLVMContext C; - - std::unique_ptr<Module> M = parseIR( - C, R"(@x = addrspace(4) global [2 x i32] zeroinitializer, align 4)"); - SmallVector<GlobalValue *, 2> Globals; - for (auto &G : M->globals()) { - Globals.push_back(&G); - } - EXPECT_EQ(0, getListSize(*M, "llvm.compiler.used")); - appendToCompilerUsed(*M, Globals); - EXPECT_EQ(1, getListSize(*M, "llvm.compiler.used")); - - EXPECT_EQ(0, getListSize(*M, "llvm.used")); - appendToUsed(*M, Globals); - EXPECT_EQ(1, getListSize(*M, "llvm.used")); -} - -TEST(ModuleUtils, AppendToUsedList2) { - LLVMContext C; - - std::unique_ptr<Module> M = - parseIR(C, R"(@x = global [2 x i32] zeroinitializer, align 4)"); - SmallVector<GlobalValue *, 2> Globals; - for (auto &G : M->globals()) { - Globals.push_back(&G); - } - EXPECT_EQ(0, getListSize(*M, "llvm.compiler.used")); - appendToCompilerUsed(*M, Globals); - EXPECT_EQ(1, getListSize(*M, "llvm.compiler.used")); - - EXPECT_EQ(0, getListSize(*M, "llvm.used")); - appendToUsed(*M, Globals); - EXPECT_EQ(1, getListSize(*M, "llvm.used")); -} - -TEST(ModuleUtils, AppendToUsedList3) { - LLVMContext C; - - std::unique_ptr<Module> M = parseIR(C, R"( - @x = addrspace(1) global [2 x i32] zeroinitializer, align 4 - @y = addrspace(2) global [2 x i32] zeroinitializer, align 4 - @llvm.compiler.used = appending global [1 x ptr addrspace(3)] [ptr addrspace(3) addrspacecast (ptr addrspace(1) @x to ptr addrspace(3))] - )"); - GlobalVariable *X = M->getNamedGlobal("x"); - GlobalVariable *Y = M->getNamedGlobal("y"); - EXPECT_EQ(1, getListSize(*M, "llvm.compiler.used")); - appendToCompilerUsed(*M, X); - EXPECT_EQ(1, getListSize(*M, "llvm.compiler.used")); - appendToCompilerUsed(*M, Y); - EXPECT_EQ(2, getListSize(*M, "llvm.compiler.used")); -} - using AppendFnType = decltype(&appendToGlobalCtors); using TransformFnType = decltype(&transformGlobalCtors); using ParamType = std::tuple<StringRef, AppendFnType, TransformFnType>; From 1b7c0600db33ead3e67545684a4472be42c4a632 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <[email protected]> Date: Tue, 21 Oct 2025 16:39:00 +0200 Subject: [PATCH 09/12] [Review] Inline lambda with reference to string, to a trivial version --- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 8a132c84265f2..069886c8bd6df 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -5863,10 +5863,6 @@ static const char *getSectionNameForCommandline(const Triple &T) { llvm_unreachable("Unimplemented ObjectFormatType"); } -static auto globalInUsedHasName(StringRef Name) { - return [Name](Constant *C) { return C->getName() == Name; }; -} - void llvm::embedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf, bool EmbedBitcode, bool EmbedCmdline, const std::vector<uint8_t> &CmdArgs) { @@ -5904,7 +5900,8 @@ void llvm::embedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf, NewGlobals.push_back(EmbeddedModule); if (llvm::GlobalVariable *Old = M.getGlobalVariable("llvm.embedded.module", true)) { - removeFromUsedLists(M, globalInUsedHasName("llvm.embedded.module")); + removeFromUsedLists( + M, [](Constant *C) { return C->getName() == "llvm.embedded.module"; }); assert(Old->hasZeroLiveUses() && "llvm.embedded.module can only be used once in llvm.compiler.used"); EmbeddedModule->takeName(Old); @@ -5926,7 +5923,8 @@ void llvm::embedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf, CmdLine->setSection(getSectionNameForCommandline(T)); CmdLine->setAlignment(Align(1)); if (llvm::GlobalVariable *Old = M.getGlobalVariable("llvm.cmdline", true)) { - removeFromUsedLists(M, globalInUsedHasName("llvm.cmdline")); + removeFromUsedLists( + M, [](Constant *C) { return C->getName() == "llvm.cmdline"; }); assert(Old->hasZeroLiveUses() && "llvm.cmdline can only be used once in llvm.compiler.used"); CmdLine->takeName(Old); From 6005afe88228041c48ac749657dc3984ff299906 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <[email protected]> Date: Mon, 3 Nov 2025 16:23:40 +0100 Subject: [PATCH 10/12] [Update] Use helpers in Clang's side --- clang/lib/CodeGen/CodeGenModule.cpp | 40 ++++++++++------------------- 1 file changed, 13 insertions(+), 27 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index af5be95aec1cd..8c13e63cfe802 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -59,6 +59,8 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/ProfileSummary.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/CRC.h" @@ -72,6 +74,7 @@ #include "llvm/TargetParser/Triple.h" #include "llvm/TargetParser/X86TargetParser.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" #include <optional> #include <set> @@ -3325,35 +3328,18 @@ void CodeGenModule::addUsedOrCompilerUsedGlobal(llvm::GlobalValue *GV) { LLVMUsed.emplace_back(GV); } -static void emitUsed(CodeGenModule &CGM, StringRef Name, - std::vector<llvm::WeakTrackingVH> &List) { - // Don't create llvm.used if there is no need. - if (List.empty()) - return; - - llvm::PointerType *UnqualPtr = - llvm::PointerType::getUnqual(CGM.getLLVMContext()); - - // Convert List to what ConstantArray needs. - SmallVector<llvm::Constant*, 8> UsedArray; - UsedArray.resize(List.size()); - for (unsigned i = 0, e = List.size(); i != e; ++i) { - UsedArray[i] = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( - cast<llvm::Constant>(&*List[i]), UnqualPtr); - } - - llvm::ArrayType *ATy = llvm::ArrayType::get(UnqualPtr, UsedArray.size()); - - auto *GV = new llvm::GlobalVariable( - CGM.getModule(), ATy, false, llvm::GlobalValue::AppendingLinkage, - llvm::ConstantArray::get(ATy, UsedArray), Name); +void CodeGenModule::emitLLVMUsed() { + auto CastToGlobal = [](llvm::WeakTrackingVH &VH) { + return cast<llvm::GlobalValue>(VH); + }; - GV->setSection("llvm.metadata"); -} + SmallVector<llvm::GlobalValue *> LLVMUsedGV(LLVMUsed.size()); + llvm::transform(LLVMUsed, LLVMUsedGV.begin(), CastToGlobal); + llvm::appendToUsed(getModule(), LLVMUsedGV); -void CodeGenModule::emitLLVMUsed() { - emitUsed(*this, "llvm.used", LLVMUsed); - emitUsed(*this, "llvm.compiler.used", LLVMCompilerUsed); + SmallVector<llvm::GlobalValue *> LLVMCompilerUsedGV(LLVMCompilerUsed.size()); + llvm::transform(LLVMCompilerUsed, LLVMCompilerUsedGV.begin(), CastToGlobal); + llvm::appendToCompilerUsed(getModule(), LLVMCompilerUsedGV); } void CodeGenModule::AppendLinkerOptions(StringRef Opts) { From 00d3bfb37bbcd1fbcda4220a1ce2884637e79a53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <[email protected]> Date: Mon, 3 Nov 2025 16:24:27 +0100 Subject: [PATCH 11/12] [Review] header is not used anymore --- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 069886c8bd6df..01d6fadfba5df 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -75,7 +75,6 @@ #include "llvm/Support/SHA1.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/Triple.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" #include <algorithm> #include <cassert> #include <cstddef> From 019522026d7a14a773fa098367871df3e60ad93b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <[email protected]> Date: Mon, 3 Nov 2025 16:26:05 +0100 Subject: [PATCH 12/12] [Review] Since Clang doesn't emit elements in the wrong addrspace, we can now use unqual only --- llvm/lib/IR/Globals.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp index 609c463d95d40..506a115d2f76d 100644 --- a/llvm/lib/IR/Globals.cpp +++ b/llvm/lib/IR/Globals.cpp @@ -689,11 +689,10 @@ static void appendToUsedList(Module &M, StringRef Name, SmallSetVector<Constant *, 16> Init; collectUsedGlobals(GV, Init); - Type *ArrayEltTy = GV ? GV->getValueType()->getArrayElementType() - : PointerType::getUnqual(M.getContext()); if (GV) GV->eraseFromParent(); + Type *ArrayEltTy = PointerType::getUnqual(M.getContext()); for (auto *V : Values) Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy)); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
