llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-aarch64 Author: Akshat Oke (optimisan) <details> <summary>Changes</summary> --- Patch is 24.47 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/138495.diff 12 Files Affected: - (added) llvm/include/llvm/CodeGen/InitUndef.h (+24) - (modified) llvm/include/llvm/InitializePasses.h (+1-1) - (modified) llvm/include/llvm/Passes/CodeGenPassBuilder.h (+1) - (modified) llvm/include/llvm/Passes/MachinePassRegistry.def (+1-1) - (modified) llvm/lib/CodeGen/CodeGen.cpp (+1-1) - (modified) llvm/lib/CodeGen/InitUndef.cpp (+35-15) - (modified) llvm/lib/Passes/PassBuilder.cpp (+1) - (modified) llvm/test/CodeGen/AArch64/init-undef.mir (+3) - (modified) llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll (+2-2) - (modified) llvm/test/CodeGen/RISCV/rvv/handle-noreg-with-implicit-def.mir (+2) - (modified) llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir (+1) - (modified) llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.mir (+1) ``````````diff diff --git a/llvm/include/llvm/CodeGen/InitUndef.h b/llvm/include/llvm/CodeGen/InitUndef.h new file mode 100644 index 0000000000000..be1cf4bfc9872 --- /dev/null +++ b/llvm/include/llvm/CodeGen/InitUndef.h @@ -0,0 +1,24 @@ +//===- llvm/CodeGen/InitUndef.h ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_INITUNDEF_H +#define LLVM_CODEGEN_INITUNDEF_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class InitUndefPass : public PassInfoMixin<InitUndefPass> { +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_INITUNDEF_H diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index bff0526d4177a..07dc86c6fccf2 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -311,7 +311,7 @@ void initializeTargetTransformInfoWrapperPassPass(PassRegistry &); void initializeTwoAddressInstructionLegacyPassPass(PassRegistry &); void initializeTypeBasedAAWrapperPassPass(PassRegistry &); void initializeTypePromotionLegacyPass(PassRegistry &); -void initializeInitUndefPass(PassRegistry &); +void initializeInitUndefLegacyPass(PassRegistry &); void initializeUniformityInfoWrapperPassPass(PassRegistry &); void initializeUnifyLoopExitsLegacyPassPass(PassRegistry &); void initializeUnpackMachineBundlesPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 982bb16e71eab..351ef63af05c0 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -43,6 +43,7 @@ #include "llvm/CodeGen/GlobalMerge.h" #include "llvm/CodeGen/GlobalMergeFunctions.h" #include "llvm/CodeGen/IndirectBrExpand.h" +#include "llvm/CodeGen/InitUndef.h" #include "llvm/CodeGen/InterleavedAccess.h" #include "llvm/CodeGen/InterleavedLoadCombine.h" #include "llvm/CodeGen/JMCInstrumenter.h" diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index c69573ee3ed97..436b26852ce90 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -148,6 +148,7 @@ MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass()) MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass()) MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass()) MACHINE_FUNCTION_PASS("fixup-statepoint-caller-saved", FixupStatepointCallerSavedPass()) +MACHINE_FUNCTION_PASS("init-undef", InitUndefPass()) MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotAllocationPass()) MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass()) MACHINE_FUNCTION_PASS("machine-cse", MachineCSEPass()) @@ -304,7 +305,6 @@ DUMMY_MACHINE_FUNCTION_PASS("fs-profile-loader", MIRProfileLoaderNewPass) DUMMY_MACHINE_FUNCTION_PASS("funclet-layout", FuncletLayoutPass) DUMMY_MACHINE_FUNCTION_PASS("gc-empty-basic-blocks", GCEmptyBasicBlocksPass) DUMMY_MACHINE_FUNCTION_PASS("implicit-null-checks", ImplicitNullChecksPass) -DUMMY_MACHINE_FUNCTION_PASS("init-undef-pass", InitUndefPass) DUMMY_MACHINE_FUNCTION_PASS("instruction-select", InstructionSelectPass) DUMMY_MACHINE_FUNCTION_PASS("irtranslator", IRTranslatorPass) DUMMY_MACHINE_FUNCTION_PASS("kcfi", MachineKCFIPass) diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 5250534d8a4e4..aa3591cb6be58 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -54,7 +54,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeIfConverterPass(Registry); initializeImplicitNullChecksPass(Registry); initializeIndirectBrExpandLegacyPassPass(Registry); - initializeInitUndefPass(Registry); + initializeInitUndefLegacyPass(Registry); initializeInterleavedLoadCombinePass(Registry); initializeInterleavedAccessPass(Registry); initializeJMCInstrumenterPass(Registry); diff --git a/llvm/lib/CodeGen/InitUndef.cpp b/llvm/lib/CodeGen/InitUndef.cpp index 6c0e9f9e930b9..500a73be7c0f5 100644 --- a/llvm/lib/CodeGen/InitUndef.cpp +++ b/llvm/lib/CodeGen/InitUndef.cpp @@ -38,6 +38,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/InitUndef.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/DetectDeadLanes.h" @@ -59,20 +60,12 @@ using namespace llvm; namespace { -class InitUndef : public MachineFunctionPass { - const TargetInstrInfo *TII; - MachineRegisterInfo *MRI; - const TargetSubtargetInfo *ST; - const TargetRegisterInfo *TRI; - - // Newly added vregs, assumed to be fully rewritten - SmallSet<Register, 8> NewRegs; - SmallVector<MachineInstr *, 8> DeadInsts; - +class InitUndefLegacy : public MachineFunctionPass { public: static char ID; - InitUndef() : MachineFunctionPass(ID) {} + InitUndefLegacy() : MachineFunctionPass(ID) {} + bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -81,6 +74,20 @@ class InitUndef : public MachineFunctionPass { } StringRef getPassName() const override { return INIT_UNDEF_NAME; } +}; + +class InitUndef { + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + const TargetSubtargetInfo *ST; + const TargetRegisterInfo *TRI; + + // Newly added vregs, assumed to be fully rewritten + SmallSet<Register, 8> NewRegs; + SmallVector<MachineInstr *, 8> DeadInsts; + +public: + bool run(MachineFunction &MF); private: bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB, @@ -93,9 +100,9 @@ class InitUndef : public MachineFunctionPass { } // end anonymous namespace -char InitUndef::ID = 0; -INITIALIZE_PASS(InitUndef, DEBUG_TYPE, INIT_UNDEF_NAME, false, false) -char &llvm::InitUndefID = InitUndef::ID; +char InitUndefLegacy::ID = 0; +INITIALIZE_PASS(InitUndefLegacy, DEBUG_TYPE, INIT_UNDEF_NAME, false, false) +char &llvm::InitUndefID = InitUndefLegacy::ID; static bool isEarlyClobberMI(MachineInstr &MI) { return llvm::any_of(MI.all_defs(), [](const MachineOperand &DefMO) { @@ -246,7 +253,20 @@ bool InitUndef::processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB, return Changed; } -bool InitUndef::runOnMachineFunction(MachineFunction &MF) { +bool InitUndefLegacy::runOnMachineFunction(MachineFunction &MF) { + return InitUndef().run(MF); +} + +PreservedAnalyses InitUndefPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + if (!InitUndef().run(MF)) + return PreservedAnalyses::all(); + auto PA = getMachineFunctionPassPreservedAnalyses(); + PA.preserveSet<CFGAnalyses>(); + return PA; +} + +bool InitUndef::run(MachineFunction &MF) { ST = &MF.getSubtarget(); // The pass is only needed if early-clobber defs and undef ops cannot be diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 7740f622ede7c..5f7ce13ad8a3e 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -103,6 +103,7 @@ #include "llvm/CodeGen/GlobalMergeFunctions.h" #include "llvm/CodeGen/HardwareLoops.h" #include "llvm/CodeGen/IndirectBrExpand.h" +#include "llvm/CodeGen/InitUndef.h" #include "llvm/CodeGen/InterleavedAccess.h" #include "llvm/CodeGen/InterleavedLoadCombine.h" #include "llvm/CodeGen/JMCInstrumenter.h" diff --git a/llvm/test/CodeGen/AArch64/init-undef.mir b/llvm/test/CodeGen/AArch64/init-undef.mir index c9d23006d3523..cee41c053251d 100644 --- a/llvm/test/CodeGen/AArch64/init-undef.mir +++ b/llvm/test/CodeGen/AArch64/init-undef.mir @@ -2,6 +2,9 @@ # RUN: llc -mtriple=aarch64-- -aarch64-enable-subreg-liveness-tracking=false -run-pass=init-undef -o - %s | FileCheck %s # RUN: llc -mtriple=aarch64-- -aarch64-enable-subreg-liveness-tracking=true -run-pass=init-undef -o - %s | FileCheck %s +# RUN: llc -mtriple=aarch64-- -aarch64-enable-subreg-liveness-tracking=false -passes=init-undef -o - %s | FileCheck %s +# RUN: llc -mtriple=aarch64-- -aarch64-enable-subreg-liveness-tracking=true -passes=init-undef -o - %s | FileCheck %s + --- name: test_stxp_undef body: | diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll index 91c15565762de..e00b7ff83e322 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -9,9 +9,9 @@ ; GCN-O0: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument<post-inline>,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate<machine-function-info>)) -; GCN-O2: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,ExpandReductionsPass,early-cse<>,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,dead-mi-elimination,InitUndefPass,ProcessImplicitDefsPass,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,PostRAMachineSinkingPass,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate<machine-function-info>)) +; GCN-O2: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,ExpandReductionsPass,early-cse<>,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,dead-mi-elimination,init-undef,ProcessImplicitDefsPass,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,PostRAMachineSinkingPass,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate<machine-function-info>)) -; GCN-O3: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,ExpandReductionsPass,gvn<>,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,dead-mi-elimination,InitUndefPass,ProcessImplicitDefsPass,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,PostRAMachineSinkingPass,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate<machine-function-info>)) +; GCN-O3: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expa... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/138495 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits