https://github.com/shiltian created https://github.com/llvm/llvm-project/pull/100954
None >From 26e3c81b1488d32620f840d741966648e6d6c884 Mon Sep 17 00:00:00 2001 From: Shilei Tian <i...@tianshilei.me> Date: Sun, 28 Jul 2024 19:24:31 -0400 Subject: [PATCH] [Attributor][AMDGPU] Improve the handling of indirect calls --- llvm/include/llvm/Transforms/IPO/Attributor.h | 9 +++++---- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 18 ++++++++++++++++++ llvm/lib/Transforms/IPO/Attributor.cpp | 2 +- .../Transforms/IPO/AttributorAttributes.cpp | 3 ++- .../AMDGPU/amdgpu-attributor-no-agpr.ll | 16 +++------------- 5 files changed, 29 insertions(+), 19 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 34557238ecb23..596ee39c35a37 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -1448,7 +1448,7 @@ struct AttributorConfig { /// Callback function to determine if an indirect call targets should be made /// direct call targets (with an if-cascade). std::function<bool(Attributor &A, const AbstractAttribute &AA, CallBase &CB, - Function &AssummedCallee)> + Function &AssummedCallee, bool IsSingleton)> IndirectCalleeSpecializationCallback = nullptr; /// Helper to update an underlying call graph and to delete functions. @@ -1718,10 +1718,11 @@ struct Attributor { /// Return true if we should specialize the call site \b CB for the potential /// callee \p Fn. bool shouldSpecializeCallSiteForCallee(const AbstractAttribute &AA, - CallBase &CB, Function &Callee) { + CallBase &CB, Function &Callee, + bool IsSingleton) { return Configuration.IndirectCalleeSpecializationCallback - ? Configuration.IndirectCalleeSpecializationCallback(*this, AA, - CB, Callee) + ? Configuration.IndirectCalleeSpecializationCallback( + *this, AA, CB, Callee, IsSingleton) : true; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index ab98da31b050f..b8ab11a7b420b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -14,6 +14,7 @@ #include "GCNSubtarget.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/Analysis/CycleAnalysis.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsR600.h" @@ -1041,11 +1042,28 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM, &AAPointerInfo::ID, &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID, &AAIndirectCallInfo::ID}); + /// Helper to decide if we should specialize the indirect \p CB for \p Callee. + /// \p IsSingleton indicates whether the \p Callee is the only assumed callee. + auto IndirectCalleeSpecializationCallback = + [&](Attributor &A, const AbstractAttribute &AA, CallBase &CB, + Function &Callee, bool IsSingleton) { + if (AMDGPU::isEntryFunctionCC(Callee.getCallingConv())) + return false; + // Singleton functions should be specialized. + if (IsSingleton) + return true; + // Otherwise specialize uniform values. + const auto &TTI = TM.getTargetTransformInfo(*CB.getCaller()); + return TTI.isAlwaysUniform(CB.getCalledOperand()); + }; + AttributorConfig AC(CGUpdater); AC.IsClosedWorldModule = HasWholeProgramVisibility; AC.Allowed = &Allowed; AC.IsModulePass = true; AC.DefaultInitializeLiveInternals = false; + AC.IndirectCalleeSpecializationCallback = + IndirectCalleeSpecializationCallback; AC.IPOAmendableCB = [](const Function &F) { return F.getCallingConv() == CallingConv::AMDGPU_KERNEL; }; diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 910c0aeacc42e..879a26bcf328d 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -3836,7 +3836,7 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache, if (MaxSpecializationPerCB.getNumOccurrences()) { AC.IndirectCalleeSpecializationCallback = [&](Attributor &, const AbstractAttribute &AA, CallBase &CB, - Function &Callee) { + Function &Callee, bool IsSingleton) { if (MaxSpecializationPerCB == 0) return false; auto &Set = IndirectCalleeTrackingMap[&CB]; diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 2816a85743faa..3f02ea1cbd6cb 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -12347,7 +12347,8 @@ struct AAIndirectCallInfoCallSite : public AAIndirectCallInfo { SmallVector<Function *, 8> SkippedAssumedCallees; SmallVector<std::pair<CallInst *, Instruction *>> NewCalls; for (Function *NewCallee : AssumedCallees) { - if (!A.shouldSpecializeCallSiteForCallee(*this, *CB, *NewCallee)) { + if (!A.shouldSpecializeCallSiteForCallee(*this, *CB, *NewCallee, + AssumedCallees.size() == 1)) { SkippedAssumedCallees.push_back(NewCallee); SpecializedForAllCallees = false; continue; diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll index e5d440b96349f..d89dae1933365 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll @@ -231,19 +231,7 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) { ; CHECK-LABEL: define amdgpu_kernel void @indirect_calls_none_agpr( ; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty -; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]] -; CHECK: 2: -; CHECK-NEXT: call void @also_empty() -; CHECK-NEXT: br label [[TMP6:%.*]] -; CHECK: 3: -; CHECK-NEXT: br i1 true, label [[TMP4:%.*]], label [[TMP5:%.*]] -; CHECK: 4: -; CHECK-NEXT: call void @empty() -; CHECK-NEXT: br label [[TMP6]] -; CHECK: 5: -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: call void [[FPTR]](), !callees [[META0:![0-9]+]] ; CHECK-NEXT: ret void ; %fptr = select i1 %cond, ptr @empty, ptr @also_empty @@ -265,3 +253,5 @@ attributes #0 = { "amdgpu-no-agpr" } ; CHECK: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" } ; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-agpr" } ;. +; CHECK: [[META0]] = !{ptr @also_empty, ptr @empty} +;. _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits