https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/120557
>From c6fa1b8713b753bad7f62933aa6b2cff73fbdd47 Mon Sep 17 00:00:00 2001 From: Akshat Oke <akshat....@amd.com> Date: Tue, 11 Feb 2025 12:36:40 +0000 Subject: [PATCH] [CodeGen][NewPM] Plug greedy RA in codegen pipeline --- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 51 ++++++++++++++----- .../llvm/Passes/MachinePassRegistry.def | 4 +- .../include/llvm/Target/CGPassBuilderOption.h | 4 +- llvm/lib/Passes/PassBuilder.cpp | 14 +++++ ...plicit-def-remat-requires-impdef-check.mir | 1 + ...implicit-def-with-impdef-greedy-assert.mir | 1 + llvm/test/CodeGen/AArch64/pr51516.mir | 1 + llvm/test/CodeGen/AArch64/spill-fold.mir | 2 + llvm/test/CodeGen/MIR/Generic/runPass.mir | 1 + .../SystemZ/clear-liverange-spillreg.mir | 1 + llvm/test/CodeGen/Thumb/high-reg-clobber.mir | 1 + llvm/test/CodeGen/X86/limit-split-cost.mir | 1 + llvm/tools/llc/NewPMDriver.cpp | 15 ++++-- 13 files changed, 75 insertions(+), 22 deletions(-) diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index a1e81f4b7375a1..31120b0b43485a 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -1061,7 +1061,9 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addMachineSSAOptimization( /// /// A target that uses the standard regalloc pass order for fast or optimized /// allocation may still override this for per-target regalloc -/// selection. But -regalloc=... always takes precedence. +/// selection. But -regalloc-npm=... always takes precedence. +/// If a target does not want to allow users to set -regalloc-npm=... at all, +/// check if Opt.RegAlloc == RegAllocType::Unset. template <typename Derived, typename TargetMachineT> void CodeGenPassBuilder<Derived, TargetMachineT>::addTargetRegisterAllocator( AddMachinePass &addPass, bool Optimized) const { @@ -1074,10 +1076,29 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addTargetRegisterAllocator( /// Find and instantiate the register allocation pass requested by this target /// at the current optimization level. Different register allocators are /// defined as separate passes because they may require different analysis. +/// +/// This helper ensures that the -regalloc-npm= option is always available, +/// even for targets that override the default allocator. template <typename Derived, typename TargetMachineT> void CodeGenPassBuilder<Derived, TargetMachineT>::addRegAllocPass( AddMachinePass &addPass, bool Optimized) const { - // TODO: Parse Opt.RegAlloc to add register allocator. + // Use the specified -regalloc-npm={basic|greedy|fast|pbqp} + if (Opt.RegAlloc > RegAllocType::Default) { + switch (Opt.RegAlloc) { + case RegAllocType::Fast: + addPass(RegAllocFastPass()); + break; + case RegAllocType::Greedy: + addPass(RAGreedyPass()); + break; + default: + report_fatal_error("register allocator not supported yet.", false); + } + return; + } + // -regalloc=default or unspecified, so pick based on the optimization level + // or ask the target for the regalloc pass. + derived().addTargetRegisterAllocator(addPass, Optimized); } template <typename Derived, typename TargetMachineT> @@ -1148,20 +1169,22 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addOptimizedRegAlloc( // PreRA instruction scheduling. addPass(MachineSchedulerPass()); - if (derived().addRegAssignmentOptimized(addPass)) { - // Allow targets to expand pseudo instructions depending on the choice of - // registers before MachineCopyPropagation. - derived().addPostRewrite(addPass); + if (auto E = derived().addRegAssignmentOptimized(addPass)) { + // addRegAssignmentOptimized did not add a reg alloc pass, so do nothing. + return; + } + // Allow targets to expand pseudo instructions depending on the choice of + // registers before MachineCopyPropagation. + derived().addPostRewrite(addPass); - // Copy propagate to forward register uses and try to eliminate COPYs that - // were not coalesced. - addPass(MachineCopyPropagationPass()); + // Copy propagate to forward register uses and try to eliminate COPYs that + // were not coalesced. + addPass(MachineCopyPropagationPass()); - // Run post-ra machine LICM to hoist reloads / remats. - // - // FIXME: can this move into MachineLateOptimization? - addPass(MachineLICMPass()); - } + // Run post-ra machine LICM to hoist reloads / remats. + // + // FIXME: can this move into MachineLateOptimization? + addPass(MachineLICMPass()); } //===---------------------------------------------------------------------===// diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 9e2d9e071e85de..3656e5db9fb838 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -193,12 +193,12 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS( }, "filter=reg-filter;no-clear-vregs") +// 'all' is the default filter MACHINE_FUNCTION_PASS_WITH_PARAMS( "greedy", "RAGreedyPass", [](RAGreedyPass::Options Opts) { return RAGreedyPass(Opts); }, [PB = this](StringRef Params) { - // TODO: parseRegAllocGreedyFilterFunc(*PB, Params); - return Expected<RAGreedyPass::Options>(RAGreedyPass::Options{}); + return parseRegAllocGreedyFilterFunc(*PB, Params); }, "reg-filter" ) #undef MACHINE_FUNCTION_PASS_WITH_PARAMS diff --git a/llvm/include/llvm/Target/CGPassBuilderOption.h b/llvm/include/llvm/Target/CGPassBuilderOption.h index d3d19c8a7dc9f2..b496a9f66296f2 100644 --- a/llvm/include/llvm/Target/CGPassBuilderOption.h +++ b/llvm/include/llvm/Target/CGPassBuilderOption.h @@ -20,7 +20,7 @@ namespace llvm { enum class RunOutliner { TargetDefault, AlwaysOutline, NeverOutline }; -enum class RegAllocType { Default, Basic, Fast, Greedy, PBQP }; +enum class RegAllocType { Unset, Default, Basic, Fast, Greedy, PBQP }; // Not one-on-one but mostly corresponding to commandline options in // TargetPassConfig.cpp. @@ -52,7 +52,7 @@ struct CGPassBuilderOption { bool RequiresCodeGenSCCOrder = false; RunOutliner EnableMachineOutliner = RunOutliner::TargetDefault; - StringRef RegAlloc = "default"; + RegAllocType RegAlloc = RegAllocType::Unset; std::optional<GlobalISelAbortMode> EnableGlobalISelAbort; std::string FSProfileFile; std::string FSRemappingFile; diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 86e669864f4718..1042297e7a705f 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1412,6 +1412,20 @@ parseBoundsCheckingOptions(StringRef Params) { return Options; } +Expected<RAGreedyPass::Options> +parseRegAllocGreedyFilterFunc(PassBuilder &PB, StringRef Params) { + if (Params.empty() || Params == "all") { + return RAGreedyPass::Options(); + } + std::optional<RegAllocFilterFunc> Filter = PB.parseRegAllocFilter(Params); + if (!Filter) { + return make_error<StringError>( + formatv("invalid regallocgreedy register filter '{0}' ", Params).str(), + inconvertibleErrorCode()); + } + return RAGreedyPass::Options{*Filter, Params}; +} + } // namespace /// Tests whether a pass name starts with a valid prefix for a default pipeline diff --git a/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir b/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir index 47aa34e3c01156..e8d0c43e814333 100644 --- a/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir +++ b/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 # RUN: llc -mtriple=arm64-apple-macosx -mcpu=apple-m1 -stress-regalloc=4 -verify-regalloc -run-pass=greedy -o - %s | FileCheck %s +# RUN: llc -mtriple=arm64-apple-macosx -mcpu=apple-m1 -stress-regalloc=4 -verify-regalloc -passes=greedy -o - %s | FileCheck %s --- | define void @inst_stores_to_dead_spill_implicit_def_impdef() { diff --git a/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir b/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir index a5d74ef75f0a0a..a1caa46e8b2bbe 100644 --- a/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir +++ b/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 # RUN: llc -mtriple=arm64-apple-ios -run-pass=greedy -o - %s | FileCheck %s +# RUN: llc -mtriple=arm64-apple-ios -passes=greedy -o - %s | FileCheck %s --- name: widget diff --git a/llvm/test/CodeGen/AArch64/pr51516.mir b/llvm/test/CodeGen/AArch64/pr51516.mir index 910bfb858b50f6..ae54ad0d5cef44 100644 --- a/llvm/test/CodeGen/AArch64/pr51516.mir +++ b/llvm/test/CodeGen/AArch64/pr51516.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple=aarch64-unknown-fuchsia -run-pass=greedy -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=aarch64-unknown-fuchsia -passes=greedy -verify-machineinstrs -o - %s | FileCheck %s # Check that we spill %31 and do not rematerialize it since the use operand # of ADDXri is killed by the STRXui in this block. diff --git a/llvm/test/CodeGen/AArch64/spill-fold.mir b/llvm/test/CodeGen/AArch64/spill-fold.mir index b1e7ebe3a7e82b..0149e4504bed2d 100644 --- a/llvm/test/CodeGen/AArch64/spill-fold.mir +++ b/llvm/test/CodeGen/AArch64/spill-fold.mir @@ -1,5 +1,7 @@ # RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass greedy -verify-machineinstrs -o - %s | FileCheck %s # RUN: llc -mtriple=aarch64_be-none-linux-gnu -run-pass greedy -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=aarch64-none-linux-gnu -passes=greedy -o - %s | FileCheck %s +# RUN: llc -mtriple=aarch64_be-none-linux-gnu -passes=greedy -o - %s | FileCheck %s --- | define i64 @test_subreg_spill_fold() { ret i64 0 } define i64 @test_subreg_spill_fold2() { ret i64 0 } diff --git a/llvm/test/CodeGen/MIR/Generic/runPass.mir b/llvm/test/CodeGen/MIR/Generic/runPass.mir index 75763c5389b09e..824d9ed0e65f84 100644 --- a/llvm/test/CodeGen/MIR/Generic/runPass.mir +++ b/llvm/test/CodeGen/MIR/Generic/runPass.mir @@ -2,6 +2,7 @@ # RUN: llc -run-pass=regallocbasic -debug-pass=Arguments -o - %s | FileCheck %s # RUN: llc -run-pass=regallocfast -debug-pass=Arguments -o - %s | FileCheck %s # RUN: llc -passes=regallocfast -o - %s | FileCheck %s +# RUN: llc -passes=greedy -o - %s | FileCheck %s # Check that passes are initialized correctly, so that it's possible to # use -run-pass. diff --git a/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir b/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir index 197c3d8551fc38..a9aecb76edcf6d 100644 --- a/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir +++ b/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir @@ -1,4 +1,5 @@ #RUN: llc -o - %s -mtriple=s390x-ibm-linux -run-pass=greedy +#RUN: llc -o - %s -mtriple=s390x-ibm-linux -passes=greedy #PR34502. Check HoistSpill works properly after the live range of spilled #virtual register is cleared. --- | diff --git a/llvm/test/CodeGen/Thumb/high-reg-clobber.mir b/llvm/test/CodeGen/Thumb/high-reg-clobber.mir index 1402c7c2cbca36..6897aa7f34f94a 100644 --- a/llvm/test/CodeGen/Thumb/high-reg-clobber.mir +++ b/llvm/test/CodeGen/Thumb/high-reg-clobber.mir @@ -3,6 +3,7 @@ # RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass greedy %s -o - | FileCheck %s # RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass regallocfast %s -o - | FileCheck %s --check-prefix=FAST # RUN: llc -mtriple thumbv6m-arm-none-eabi -passes=regallocfast %s -o - | FileCheck %s --check-prefix=FAST +# RUN: llc -mtriple thumbv6m-arm-none-eabi -passes=greedy %s -o - | FileCheck %s ... --- diff --git a/llvm/test/CodeGen/X86/limit-split-cost.mir b/llvm/test/CodeGen/X86/limit-split-cost.mir index 7ec0404e0f737c..5b8bb98389c028 100644 --- a/llvm/test/CodeGen/X86/limit-split-cost.mir +++ b/llvm/test/CodeGen/X86/limit-split-cost.mir @@ -1,5 +1,6 @@ # REQUIRES: asserts # RUN: llc -mtriple=x86_64-- -run-pass=greedy %s -debug-only=regalloc -huge-size-for-split=0 -o /dev/null 2>&1 | FileCheck %s +# RUN: llc -mtriple=x86_64-- -passes=greedy %s -debug-only=regalloc -huge-size-for-split=0 -o /dev/null 2>&1 | FileCheck %s # Check no global region split is needed because the live range to split is trivially rematerializable. # CHECK-NOT: Compact region bundles --- | diff --git a/llvm/tools/llc/NewPMDriver.cpp b/llvm/tools/llc/NewPMDriver.cpp index 3892fbb8c74f78..0f7aa6284962a2 100644 --- a/llvm/tools/llc/NewPMDriver.cpp +++ b/llvm/tools/llc/NewPMDriver.cpp @@ -48,10 +48,17 @@ using namespace llvm; -static cl::opt<std::string> - RegAlloc("regalloc-npm", - cl::desc("Register allocator to use for new pass manager"), - cl::Hidden, cl::init("default")); +static cl::opt<RegAllocType> RegAlloc( + "regalloc-npm", cl::desc("Register allocator to use for new pass manager"), + cl::Hidden, cl::init(RegAllocType::Unset), + cl::values( + clEnumValN(RegAllocType::Default, "default", + "Default register allocator"), + clEnumValN(RegAllocType::PBQP, "pbqp", "PBQP register allocator"), + clEnumValN(RegAllocType::Fast, "fast", "Fast register allocator"), + clEnumValN(RegAllocType::Basic, "basic", "Basic register allocator"), + clEnumValN(RegAllocType::Greedy, "greedy", + "Greedy register allocator"))); static cl::opt<bool> DebugPM("debug-pass-manager", cl::Hidden, _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits