llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Jon Chesterfield (JonChesterfield) <details> <summary>Changes</summary> Implement an attribute in the spirit of always_inline, for giving programmers a hook to have llvm specialise subtrees of their program with respect to constant variables. For example, specialise a sort function on the comparator, or a datastructure on a struct of function pointers serving as a vtable. The attribute solves the cost model puzzle. An IR transform means bitcode libraries with the attribute in place are specialised with respect to applications, without needing to hoist the code in question into C++ templates. The implementation is straightforward. It might grow some additional on the fly simplifications (maybe icmp), and at most one command line argument to specify the extent to which specialisations can request further specialisations, but generally I don't expect it to grow much over time. I'm hopeful that this can be used to nudge some code away from writing always_inline everywhere. It's a feature some other languages have that I miss when working in IR. --- Patch is 63.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/143983.diff 34 Files Affected: - (modified) clang/include/clang/Basic/Attr.td (+8) - (modified) clang/include/clang/Basic/AttrDocs.td (+11) - (modified) clang/lib/CodeGen/CodeGenModule.cpp (+9) - (modified) clang/lib/Sema/SemaDeclAttr.cpp (+3) - (modified) clang/test/CodeGen/lto-newpm-pipeline.c (+2) - (modified) clang/test/Misc/pragma-attribute-supported-attributes-list.test (+1) - (modified) llvm/include/llvm/Bitcode/LLVMBitCodes.h (+1) - (modified) llvm/include/llvm/IR/Attributes.td (+3) - (modified) llvm/include/llvm/InitializePasses.h (+1) - (added) llvm/include/llvm/Transforms/IPO/AlwaysSpecializer.h (+29) - (modified) llvm/lib/Bitcode/Reader/BitcodeReader.cpp (+2) - (modified) llvm/lib/Bitcode/Writer/BitcodeWriter.cpp (+2) - (modified) llvm/lib/Passes/PassBuilder.cpp (+1) - (modified) llvm/lib/Passes/PassBuilderPipelines.cpp (+3) - (modified) llvm/lib/Passes/PassRegistry.def (+1) - (added) llvm/lib/Transforms/IPO/AlwaysSpecializer.cpp (+324) - (modified) llvm/lib/Transforms/IPO/CMakeLists.txt (+1) - (modified) llvm/lib/Transforms/IPO/SCCP.cpp (-1) - (modified) llvm/lib/Transforms/Utils/CodeExtractor.cpp (+1) - (modified) llvm/test/Other/new-pm-O0-defaults.ll (+2) - (modified) llvm/test/Other/new-pm-defaults.ll (+1) - (modified) llvm/test/Other/new-pm-thinlto-postlink-defaults.ll (+1) - (modified) llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll (+1) - (modified) llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll (+1) - (modified) llvm/test/Other/new-pm-thinlto-prelink-defaults.ll (+1) - (modified) llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll (+1) - (modified) llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll (+1) - (added) llvm/test/Transforms/FunctionSpecialization/always-specialize-diamond.ll (+89) - (added) llvm/test/Transforms/FunctionSpecialization/always-specialize-recursive-factorial.ll (+155) - (added) llvm/test/Transforms/FunctionSpecialization/always-specialize-recursive-indirect.ll (+294) - (added) llvm/test/Transforms/FunctionSpecialization/always-specialize-recursive.ll (+44) - (added) llvm/test/Transforms/FunctionSpecialization/always-specialize-simple.ll (+229) - (added) llvm/test/Transforms/FunctionSpecialization/always-specialize-variadic.ll (+55) - (modified) llvm/tools/opt/optdriver.cpp (+1) ``````````diff diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index f113cd2ba2fbf..daef074e9dc72 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -905,6 +905,14 @@ def AlwaysInline : DeclOrStmtAttr { let Documentation = [AlwaysInlineDocs]; } +def AlwaysSpecialize : InheritableParamAttr { + let Spellings = [GNU<"always_specialize">, CXX11<"clang", "always_specialize">, + C23<"clang", "always_specialize">]; + let Subjects = SubjectList<[ParmVar]>; + let Documentation = [AlwaysSpecializeDocs]; + let SimpleHandler = 1; +} + def Artificial : InheritableAttr { let Spellings = [GCC<"artificial">]; let Subjects = SubjectList<[InlineFunction]>; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 047f51ffa59ed..64129a3107218 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -8126,6 +8126,17 @@ Attribute docs`_, and `the GCC Inline docs`_. let Heading = "always_inline, __force_inline"; } +def AlwaysSpecializeDocs : Documentation { + let Category = DocCatConsumed; + let Content = [{ + The ``always_specialize`` attribute on a function parameter indicates that + the function shall be duplicated and specialized with respect to constant + arguments. This will usually increase code size. It controls an IR transform + similar in spirit to ``always_inline``. + }]; + let Heading = "always_specialize"; +} + def EnforceTCBDocs : Documentation { let Category = DocCatFunction; let Content = [{ diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 16e49aab4fe61..4ba32986146ef 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2559,6 +2559,9 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, B.addAttribute("aarch64_new_zt0"); } + if (D->hasAttr<AlwaysSpecializeAttr>()) + B.addAttribute(llvm::Attribute::AlwaysSpecialize); + // Track whether we need to add the optnone LLVM attribute, // starting with the default for this optimization level. bool ShouldAddOptNone = @@ -2978,6 +2981,12 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, F->addParamAttr(0, llvm::Attribute::Returned); } + for (auto [Index, Param] : enumerate(FD->parameters())) + if (Param->hasAttrs()) + for (auto *A : Param->getAttrs()) + if (A->getKind() == attr::AlwaysSpecialize) + F->addParamAttr(Index, llvm::Attribute::AlwaysSpecialize); + // Only a few attributes are set on declarations; these may later be // overridden by a definition. diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 1aeae41042a1c..c32f147737883 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -7137,6 +7137,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_AlwaysInline: handleAlwaysInlineAttr(S, D, AL); break; + case ParsedAttr::AT_AlwaysSpecialize: + handleSimpleAttribute<AlwaysSpecializeAttr>(S, D, AL); + break; case ParsedAttr::AT_AnalyzerNoReturn: handleAnalyzerNoReturnAttr(S, D, AL); break; diff --git a/clang/test/CodeGen/lto-newpm-pipeline.c b/clang/test/CodeGen/lto-newpm-pipeline.c index ea9784a76f923..7f83bd38e7803 100644 --- a/clang/test/CodeGen/lto-newpm-pipeline.c +++ b/clang/test/CodeGen/lto-newpm-pipeline.c @@ -31,6 +31,7 @@ // CHECK-FULL-O0-NEXT: Running pass: EntryExitInstrumenterPass // CHECK-FULL-O0-NEXT: Running pass: AlwaysInlinerPass // CHECK-FULL-O0-NEXT: Running analysis: ProfileSummaryAnalysis +// CHECK-FULL-O0-NEXT: Running pass: AlwaysSpecializerPass // CHECK-FULL-O0-NEXT: Running pass: CoroConditionalWrapper // CHECK-FULL-O0-NEXT: Running pass: CanonicalizeAliasesPass // CHECK-FULL-O0-NEXT: Running pass: NameAnonGlobalPass @@ -45,6 +46,7 @@ // CHECK-THIN-O0-NEXT: Running pass: EntryExitInstrumenterPass // CHECK-THIN-O0-NEXT: Running pass: AlwaysInlinerPass // CHECK-THIN-O0-NEXT: Running analysis: ProfileSummaryAnalysis +// CHECK-THIN-O0-NEXT: Running pass: AlwaysSpecializerPass // CHECK-THIN-O0-NEXT: Running pass: CoroConditionalWrapper // CHECK-THIN-O0-NEXT: Running pass: CanonicalizeAliasesPass // CHECK-THIN-O0-NEXT: Running pass: NameAnonGlobalPass diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index 41d00dae3f69a..cd90e06609c55 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -15,6 +15,7 @@ // CHECK-NEXT: AlignValue (SubjectMatchRule_variable, SubjectMatchRule_type_alias) // CHECK-NEXT: AlwaysDestroy (SubjectMatchRule_variable) // CHECK-NEXT: AlwaysInline (SubjectMatchRule_function) +// CHECK-NEXT: AlwaysSpecialize (SubjectMatchRule_variable_is_parameter) // CHECK-NEXT: Annotate () // CHECK-NEXT: AnyX86NoCfCheck (SubjectMatchRule_hasType_functionType) // CHECK-NEXT: ArcWeakrefUnavailable (SubjectMatchRule_objc_interface) diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index b362a88963f6c..476bb4167dea8 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -798,6 +798,7 @@ enum AttributeKindCodes { ATTR_KIND_NO_DIVERGENCE_SOURCE = 100, ATTR_KIND_SANITIZE_TYPE = 101, ATTR_KIND_CAPTURES = 102, + ATTR_KIND_ALWAYS_SPECIALIZE = 103, }; enum ComdatSelectionKindCodes { diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td index d488c5f419b82..fb4c7366d9491 100644 --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -98,6 +98,9 @@ def AllocSize : IntAttr<"allocsize", IntersectPreserve, [FnAttr]>; /// inline=always. def AlwaysInline : EnumAttr<"alwaysinline", IntersectPreserve, [FnAttr]>; +/// Specialize function when argument at call site is known constant +def AlwaysSpecialize : EnumAttr<"alwaysspecialize", IntersectPreserve, [ParamAttr]>; + /// Callee is recognized as a builtin, despite nobuiltin attribute on its /// declaration. def Builtin : EnumAttr<"builtin", IntersectPreserve, [FnAttr]>; diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 42610d505c2bd..4c3f0ea08ed43 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -50,6 +50,7 @@ void initializeTarget(PassRegistry &); void initializeAAResultsWrapperPassPass(PassRegistry &); void initializeAlwaysInlinerLegacyPassPass(PassRegistry &); +void initializeAlwaysSpecializerPass(PassRegistry &); void initializeAssignmentTrackingAnalysisPass(PassRegistry &); void initializeAssumptionCacheTrackerPass(PassRegistry &); void initializeAtomicExpandLegacyPass(PassRegistry &); diff --git a/llvm/include/llvm/Transforms/IPO/AlwaysSpecializer.h b/llvm/include/llvm/Transforms/IPO/AlwaysSpecializer.h new file mode 100644 index 0000000000000..020d8eec3e760 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/AlwaysSpecializer.h @@ -0,0 +1,29 @@ +//=== AlwaysSpecializer.h - implementation of always_specialize -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORMS_IPO_ALWAYSSPECIALIZER_H +#define LLVM_TRANSFORMS_IPO_ALWAYSSPECIALIZER_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Module; +class ModulePass; + +class AlwaysSpecializerPass : public PassInfoMixin<AlwaysSpecializerPass> { +public: + AlwaysSpecializerPass(); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } +}; + +ModulePass *createAlwaysSpecializerPass(); + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_ALWAYSSPECIALIZER_H diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index fde934fbb3cf1..5cb348e1a330e 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2054,6 +2054,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::Alignment; case bitc::ATTR_KIND_ALWAYS_INLINE: return Attribute::AlwaysInline; + case bitc::ATTR_KIND_ALWAYS_SPECIALIZE: + return Attribute::AlwaysSpecialize; case bitc::ATTR_KIND_BUILTIN: return Attribute::Builtin; case bitc::ATTR_KIND_BY_VAL: diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 628b939af19ce..f3afc91176723 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -750,6 +750,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_ALLOC_SIZE; case Attribute::AlwaysInline: return bitc::ATTR_KIND_ALWAYS_INLINE; + case Attribute::AlwaysSpecialize: + return bitc::ATTR_KIND_ALWAYS_SPECIALIZE; case Attribute::Builtin: return bitc::ATTR_KIND_BUILTIN; case Attribute::ByVal: diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 4603eaff8ade9..63ad02bcc522c 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -195,6 +195,7 @@ #include "llvm/Transforms/Coroutines/CoroSplit.h" #include "llvm/Transforms/HipStdPar/HipStdPar.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/Transforms/IPO/AlwaysSpecializer.h" #include "llvm/Transforms/IPO/Annotation2Metadata.h" #include "llvm/Transforms/IPO/ArgumentPromotion.h" #include "llvm/Transforms/IPO/Attributor.h" diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index a99146d5eaa34..a14ffddeb164b 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -42,6 +42,7 @@ #include "llvm/Transforms/Coroutines/CoroSplit.h" #include "llvm/Transforms/HipStdPar/HipStdPar.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/Transforms/IPO/AlwaysSpecializer.h" #include "llvm/Transforms/IPO/Annotation2Metadata.h" #include "llvm/Transforms/IPO/ArgumentPromotion.h" #include "llvm/Transforms/IPO/Attributor.h" @@ -1277,6 +1278,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType)); MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true)); + MPM.addPass(AlwaysSpecializerPass()); if (EnableModuleInliner) MPM.addPass(buildModuleInlinerPipeline(Level, Phase)); @@ -2252,6 +2254,7 @@ PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, // code generation. MPM.addPass(AlwaysInlinerPass( /*InsertLifetimeIntrinsics=*/false)); + MPM.addPass(AlwaysSpecializerPass()); if (PTO.MergeFunctions) MPM.addPass(MergeFunctionsPass()); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index f761d0dab09a8..b65981652e258 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -50,6 +50,7 @@ MODULE_ALIAS_ANALYSIS("globals-aa", GlobalsAA()) #define MODULE_PASS(NAME, CREATE_PASS) #endif MODULE_PASS("always-inline", AlwaysInlinerPass()) +MODULE_PASS("always-specialize", AlwaysSpecializerPass()) MODULE_PASS("annotation2metadata", Annotation2MetadataPass()) MODULE_PASS("assign-guid", AssignGUIDPass()) MODULE_PASS("attributor", AttributorPass()) diff --git a/llvm/lib/Transforms/IPO/AlwaysSpecializer.cpp b/llvm/lib/Transforms/IPO/AlwaysSpecializer.cpp new file mode 100644 index 0000000000000..9e0bbe883bd10 --- /dev/null +++ b/llvm/lib/Transforms/IPO/AlwaysSpecializer.cpp @@ -0,0 +1,324 @@ +//===- AlwaysSpecializer.cpp - implementation of always_specialize --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Function specialisation under programmer control. +// +// Specifically, function parameters are marked [[always_specialize]], then call +// sites which pass a constant argument are rewritten to call specialisations. +// +// The difficult parts of function specialisation are the cost model, ensuring +// termination and specialisation to the anticipated extent. +// +// Cost model is under programmer control, exactly like always_inline. +// +// Termination follows from the implementation following a phased structure: +// 1. Functions are identifed in the input IR +// 2. Calls that exist in the input IR are identified +// Those constitute the complete set of specialisations that will be created. +// +// This pass does the _minimum_ specialisation, in the sense that only call +// sites in the input will lead to cloning. A specialised function will call +// another specialised function iff there was a call site with the same +// argument vector in the input. +// +// Running the identifyCalls + createClones sequence N times will behave +// as expected, specialising recursively to that depth. This patch has N=1 +// in the first instance, with no commandline argument to override. +// Similarly variadic functions are not yet handled. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/AlwaysSpecializer.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/IPO/FunctionSpecialization.h" +#include "llvm/Transforms/Utils/Cloning.h" + +using namespace llvm; + +#define DEBUG_TYPE "always-specialize" + +namespace { + +class AlwaysSpecializer : public ModulePass { +public: + static char ID; + + AlwaysSpecializer() : ModulePass(ID) {} + StringRef getPassName() const override { return "Always specializer"; } + + // One constant for each argument, nullptr if that one is non-constant + using ArgVector = SmallVector<Constant *, 4>; + + // A map from the ArgVector to the matching specialisation + using FunctionSpecializations = MapVector<ArgVector, Function *>; + + // The four mini-passes populate and then use a map: + // 1. identifyFunctions writes all keys, with default initialised values. + // 2. identifyCalls writes all the ArgVector keys in the values of SpecList. + // 3. createClones writes the Function* values at the leaves. + // 4. replaceCalls walks the map doing the trivial rewrite. + + // Conceptually a Map<Function*, Specialization> but a vector suffices. + using SpecListTy = + SmallVector<std::pair<Function *, FunctionSpecializations>, 4>; + + SpecListTy identifyFunctions(Module &M); + bool identifyCalls(Module &M, Function *F, FunctionSpecializations &); + bool createClones(Module &M, Function *F, FunctionSpecializations &); + bool replaceCalls(Module &M, Function *F, FunctionSpecializations &); + + bool runOnModule(Module &M) override { + bool Changed = false; + + // Sets all the keys in the structure used in this invocation. + SpecListTy SpecList = identifyFunctions(M); + size_t Count = SpecList.size(); + if (Count == 0) { + return false; + } + + // Record distinct call sites as vector<Constant*> -> nullptr + for (auto &[F, spec] : SpecList) + Changed |= identifyCalls(M, F, spec); + + // Create and record the clones. Note that call sites within the clones + // cannot trigger creating more clones so no termination risk. + for (auto &[F, spec] : SpecList) + Changed |= createClones(M, F, spec); + + // Replacing calls as the final phase means no need to track + // partially-specialised calls and no creating further clones. + for (auto &[F, spec] : SpecList) + Changed |= replaceCalls(M, F, spec); + + return Changed; + } + + static bool isCandidateFunction(const Function &F); + static bool callEligible(const Function &F, const CallBase *CB, + ArgVector &Out); + static Function *cloneCandidateFunction(Module &M, Function *F, + const ArgVector &C); + + // Only a member variable to reuse the allocation. Short lived. + ArgVector ArgVec; +}; + +AlwaysSpecializer::SpecListTy AlwaysSpecializer::identifyFunctions(Module &M) { + SpecListTy SpecList; + for (Function &F : M) { + if (isCandidateFunction(F)) { + SpecList.push_back(std::make_pair(&F, FunctionSpecializations())); + } + } + return SpecList; +} + +bool AlwaysSpecializer::identifyCalls(Module &M, Function *F, + FunctionSpecializations &Specs) { + bool Found = false; + + for (User *U : F->users()) { + CallBase *CB = dyn_cast<CallBase>(U); + if (!CB || !callEligible(*F, CB, ArgVec)) { + continue; + } + + if (!Specs.contains(ArgVec)) { + Found = true; + Specs.insert(std::make_pair(ArgVec, nullptr)); + } + } + + return Found; +} + +bool AlwaysSpecializer::createClones(Module &M, Function *F, + FunctionSpecializations &Specs) { + bool Changed = false; + + for (auto It = Specs.begin(); It != Specs.end(); ++It) { + if (It->second) + continue; + Function *Clone = cloneCandidateFunction(M, F, It->first); + if (Clone) { + Changed = true; + It->second = Clone; + } + } + + return Changed; +} + +bool AlwaysSpecializer::replaceCalls(Module &M, Function *F, + FunctionSpecializations &Specs) { + bool Changed = false; + + for (User *u : make_early_inc_range(F->users())) { + CallBase *CB = dyn_cast<CallBase>(u); + if (!CB || !callEligible(*F, CB, ArgVec)) { + continue; + } + + Function *Clone = Specs[ArgVec]; + if (Clone) { + Changed = true; + CB->setCalledFunction(Clone); + } + } + + return Changed; +} + +bool AlwaysSpecializer::isCandidateFunction(const Function &F) { + + // Test if the function itself can't be specialised + if (!F.hasExactDefinition() || F.isIntrinsic() || + F.hasFnAttribute(Attribute::Naked)) + return false; + + // Variadics are left for a follow up patch + if (F.isVarArg()) + return false; + + // Need calls to the function for it to be worth considering + if (F.use_empty()) + return false; + + // Look for the attribute on a non-dead, non-indirect parameter + for (const Argument &Arg : F.args()) { + if (Arg.hasPointeeInMemoryValueAttr()) + continue; + + if (F.hasParamAttribute(Arg.getArgNo(), Attribute::AlwaysSpecialize)) + if (!Arg.use_empty()) + return true; + } + + return false; +} + +bool AlwaysSpecializer::callEligible(const Function &F, const CallBase *CB, + ArgVector &Out) { + const size_t Arity = F.arg_size(); + bool Eligible = false; + + if (CB->getCalledOperand() != &F) { + return false; + } + + if (CB->getFunctionType() != F.getFunctionType()) { + return false; + } + + if (CB->arg_size() != Arity) { + return false; + } + + Out.clear(); + for (size_t I = 0; I < Arity; I++) { + Constant *Arg = dyn_cast<Constant>(CB->getArgOperand(I)); + if (Arg && F.hasParamAttribute(I, Attribute::AlwaysSpecialize)) { + Eligible = true; + Out.push_back(Arg); + } else { + Out.push_back(nullptr); + } + } + + return Eligible; +} + +Function *AlwaysSpecializer::cloneCandidateFunction(Module &M, Function *F, + const ArgVector &C) { + + Function *Clone = + Function::Create(F->getFunctionType(), F->getLinkage(), + F->getAddressSpace(), F->getName() + ".spec"); + + // Roughly CloneFunction but inserting specialisations next to the original. + ValueToValueMapTy VMap; + Function::arg_iterator DestI = Clone->arg_begin(); + for (const Argument &I : F->args()) { + DestI->setName(I.getName()); + ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/143983 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits