https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/102884
>From 04fe70293a7fe2432eb55620c7fc5b05310c4f74 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Mon, 12 Aug 2024 15:26:25 +0400 Subject: [PATCH] AMDGPU/NewPM: Start filling out addIRPasses This is not complete, but gets AtomicExpand running. I was able to get further than I expected; we're quite close to having all the IR codegen passes ported. --- .../AMDGPU/AMDGPUCodeGenPassBuilder.cpp | 104 ++++++++++++++++++ .../Target/AMDGPU/AMDGPUCodeGenPassBuilder.h | 5 + .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 1 + 3 files changed, 110 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp index 9fd7e24b114ddd..854e1644a71e98 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp @@ -8,14 +8,24 @@ #include "AMDGPUCodeGenPassBuilder.h" #include "AMDGPU.h" +#include "AMDGPUCtorDtorLowering.h" #include "AMDGPUISelDAGToDAG.h" #include "AMDGPUPerfHintAnalysis.h" #include "AMDGPUTargetMachine.h" #include "AMDGPUUnifyDivergentExitNodes.h" #include "SIFixSGPRCopies.h" #include "llvm/Analysis/UniformityAnalysis.h" +#include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/Transforms/IPO/ExpandVariadics.h" +#include "llvm/Transforms/Scalar/EarlyCSE.h" #include "llvm/Transforms/Scalar/FlattenCFG.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/InferAddressSpaces.h" +#include "llvm/Transforms/Scalar/LoopDataPrefetch.h" +#include "llvm/Transforms/Scalar/NaryReassociate.h" +#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h" #include "llvm/Transforms/Scalar/Sink.h" +#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h" #include "llvm/Transforms/Scalar/StructurizeCFG.h" #include "llvm/Transforms/Utils/FixIrreducible.h" #include "llvm/Transforms/Utils/LCSSA.h" @@ -38,6 +48,70 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder( ShadowStackGCLoweringPass>(); } +void AMDGPUCodeGenPassBuilder::addIRPasses(AddIRPass &addPass) const { + // TODO: Missing AMDGPURemoveIncompatibleFunctions + + addPass(AMDGPUPrintfRuntimeBindingPass()); + if (LowerCtorDtor) + addPass(AMDGPUCtorDtorLoweringPass()); + + if (isPassEnabled(EnableImageIntrinsicOptimizer)) + addPass(AMDGPUImageIntrinsicOptimizerPass(TM)); + + // This can be disabled by passing ::Disable here or on the command line + // with --expand-variadics-override=disable. + addPass(ExpandVariadicsPass(ExpandVariadicsMode::Lowering)); + + addPass(AMDGPUAlwaysInlinePass()); + addPass(AlwaysInlinerPass()); + + // TODO: Missing OpenCLEnqueuedBlockLowering + + // Runs before PromoteAlloca so the latter can account for function uses + if (EnableLowerModuleLDS) + addPass(AMDGPULowerModuleLDSPass(TM)); + + if (TM.getOptLevel() > CodeGenOptLevel::None) + addPass(InferAddressSpacesPass()); + + // Run atomic optimizer before Atomic Expand + if (TM.getOptLevel() >= CodeGenOptLevel::Less && + (AMDGPUAtomicOptimizerStrategy != ScanOptions::None)) + addPass(AMDGPUAtomicOptimizerPass(TM, AMDGPUAtomicOptimizerStrategy)); + + addPass(AtomicExpandPass()); + + if (TM.getOptLevel() > CodeGenOptLevel::None) { + addPass(AMDGPUPromoteAllocaPass(TM)); + if (isPassEnabled(EnableScalarIRPasses)) + addStraightLineScalarOptimizationPasses(addPass); + + // TODO: Handle EnableAMDGPUAliasAnalysis + + // TODO: May want to move later or split into an early and late one. + addPass(AMDGPUCodeGenPreparePass(TM)); + + // TODO: LICM + } + + Base::addIRPasses(addPass); + + // EarlyCSE is not always strong enough to clean up what LSR produces. For + // example, GVN can combine + // + // %0 = add %a, %b + // %1 = add %b, %a + // + // and + // + // %0 = shl nsw %a, 2 + // %1 = shl %a, 2 + // + // but EarlyCSE can do neither of them. + if (isPassEnabled(EnableScalarIRPasses)) + addEarlyCSEOrGVNPass(addPass); +} + void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const { // AMDGPUAnnotateKernelFeaturesPass is missing here, but it will hopefully be // deleted soon. @@ -136,6 +210,36 @@ Error AMDGPUCodeGenPassBuilder::addInstSelector(AddMachinePass &addPass) const { return Error::success(); } +void AMDGPUCodeGenPassBuilder::addEarlyCSEOrGVNPass(AddIRPass &addPass) const { + if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) + addPass(GVNPass()); + else + addPass(EarlyCSEPass()); +} + +void AMDGPUCodeGenPassBuilder::addStraightLineScalarOptimizationPasses( + AddIRPass &addPass) const { + if (isPassEnabled(EnableLoopPrefetch, CodeGenOptLevel::Aggressive)) + addPass(LoopDataPrefetchPass()); + + addPass(SeparateConstOffsetFromGEPPass()); + + // ReassociateGEPs exposes more opportunities for SLSR. See + // the example in reassociate-geps-and-slsr.ll. + addPass(StraightLineStrengthReducePass()); + + // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or + // EarlyCSE can reuse. + addEarlyCSEOrGVNPass(addPass); + + // Run NaryReassociate after EarlyCSE/GVN to be more effective. + addPass(NaryReassociatePass()); + + // NaryReassociate on GEPs creates redundant common expressions, so run + // EarlyCSE after it. + addPass(EarlyCSEPass()); +} + bool AMDGPUCodeGenPassBuilder::isPassEnabled(const cl::opt<bool> &Opt, CodeGenOptLevel Level) const { if (Opt.getNumOccurrences()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h index 1ff7744c84a436..c71566316993dd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h @@ -24,11 +24,16 @@ class AMDGPUCodeGenPassBuilder AMDGPUCodeGenPassBuilder(GCNTargetMachine &TM, const CGPassBuilderOption &Opts, PassInstrumentationCallbacks *PIC); + + void addIRPasses(AddIRPass &) const; void addCodeGenPrepare(AddIRPass &) const; void addPreISel(AddIRPass &addPass) const; void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const; Error addInstSelector(AddMachinePass &) const; + void addEarlyCSEOrGVNPass(AddIRPass &) const; + void addStraightLineScalarOptimizationPasses(AddIRPass &) const; + /// Check if a pass is enabled given \p Opt option. The option always /// overrides defaults if explicitly used. Otherwise its default will /// be used given that a pass shall work at an optimization \p Level diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index fdb9cf02988195..172101244a34fe 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -66,6 +66,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/InferAddressSpaces.h" +#include "llvm/Transforms/Scalar/LICM.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/LowerSwitch.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits