https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/138495
>From 2db3af07bf3894df69e0336e2c71c4704fd4fca8 Mon Sep 17 00:00:00 2001 From: Akshat Oke <akshat....@amd.com> Date: Mon, 5 May 2025 08:47:42 +0000 Subject: [PATCH 1/2] [CodeGen][NPM] Port InitUndef to NPM --- llvm/include/llvm/CodeGen/InitUndef.h | 24 +++++++++ llvm/include/llvm/InitializePasses.h | 2 +- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 + .../llvm/Passes/MachinePassRegistry.def | 2 +- llvm/lib/CodeGen/CodeGen.cpp | 2 +- llvm/lib/CodeGen/InitUndef.cpp | 50 +++++++++++++------ llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/test/CodeGen/AArch64/init-undef.mir | 3 ++ llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 4 +- .../rvv/handle-noreg-with-implicit-def.mir | 2 + .../rvv/subregister-undef-early-clobber.mir | 1 + .../RISCV/rvv/undef-earlyclobber-chain.mir | 1 + 12 files changed, 73 insertions(+), 20 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/InitUndef.h diff --git a/llvm/include/llvm/CodeGen/InitUndef.h b/llvm/include/llvm/CodeGen/InitUndef.h new file mode 100644 index 0000000000000..7274824a74905 --- /dev/null +++ b/llvm/include/llvm/CodeGen/InitUndef.h @@ -0,0 +1,24 @@ +//===- llvm/CodeGen/InitUndef.h --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_INITUNDEF_H +#define LLVM_CODEGEN_INITUNDEF_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class InitUndefPass : public PassInfoMixin<InitUndefPass> { +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_INITUNDEF_H diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index bff0526d4177a..07dc86c6fccf2 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -311,7 +311,7 @@ void initializeTargetTransformInfoWrapperPassPass(PassRegistry &); void initializeTwoAddressInstructionLegacyPassPass(PassRegistry &); void initializeTypeBasedAAWrapperPassPass(PassRegistry &); void initializeTypePromotionLegacyPass(PassRegistry &); -void initializeInitUndefPass(PassRegistry &); +void initializeInitUndefLegacyPass(PassRegistry &); void initializeUniformityInfoWrapperPassPass(PassRegistry &); void initializeUnifyLoopExitsLegacyPassPass(PassRegistry &); void initializeUnpackMachineBundlesPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 982bb16e71eab..351ef63af05c0 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -43,6 +43,7 @@ #include "llvm/CodeGen/GlobalMerge.h" #include "llvm/CodeGen/GlobalMergeFunctions.h" #include "llvm/CodeGen/IndirectBrExpand.h" +#include "llvm/CodeGen/InitUndef.h" #include "llvm/CodeGen/InterleavedAccess.h" #include "llvm/CodeGen/InterleavedLoadCombine.h" #include "llvm/CodeGen/JMCInstrumenter.h" diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index c69573ee3ed97..436b26852ce90 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -148,6 +148,7 @@ MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass()) MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass()) MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass()) MACHINE_FUNCTION_PASS("fixup-statepoint-caller-saved", FixupStatepointCallerSavedPass()) +MACHINE_FUNCTION_PASS("init-undef", InitUndefPass()) MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotAllocationPass()) MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass()) MACHINE_FUNCTION_PASS("machine-cse", MachineCSEPass()) @@ -304,7 +305,6 @@ DUMMY_MACHINE_FUNCTION_PASS("fs-profile-loader", MIRProfileLoaderNewPass) DUMMY_MACHINE_FUNCTION_PASS("funclet-layout", FuncletLayoutPass) DUMMY_MACHINE_FUNCTION_PASS("gc-empty-basic-blocks", GCEmptyBasicBlocksPass) DUMMY_MACHINE_FUNCTION_PASS("implicit-null-checks", ImplicitNullChecksPass) -DUMMY_MACHINE_FUNCTION_PASS("init-undef-pass", InitUndefPass) DUMMY_MACHINE_FUNCTION_PASS("instruction-select", InstructionSelectPass) DUMMY_MACHINE_FUNCTION_PASS("irtranslator", IRTranslatorPass) DUMMY_MACHINE_FUNCTION_PASS("kcfi", MachineKCFIPass) diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 5250534d8a4e4..aa3591cb6be58 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -54,7 +54,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeIfConverterPass(Registry); initializeImplicitNullChecksPass(Registry); initializeIndirectBrExpandLegacyPassPass(Registry); - initializeInitUndefPass(Registry); + initializeInitUndefLegacyPass(Registry); initializeInterleavedLoadCombinePass(Registry); initializeInterleavedAccessPass(Registry); initializeJMCInstrumenterPass(Registry); diff --git a/llvm/lib/CodeGen/InitUndef.cpp b/llvm/lib/CodeGen/InitUndef.cpp index 6c0e9f9e930b9..500a73be7c0f5 100644 --- a/llvm/lib/CodeGen/InitUndef.cpp +++ b/llvm/lib/CodeGen/InitUndef.cpp @@ -38,6 +38,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/InitUndef.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/DetectDeadLanes.h" @@ -59,20 +60,12 @@ using namespace llvm; namespace { -class InitUndef : public MachineFunctionPass { - const TargetInstrInfo *TII; - MachineRegisterInfo *MRI; - const TargetSubtargetInfo *ST; - const TargetRegisterInfo *TRI; - - // Newly added vregs, assumed to be fully rewritten - SmallSet<Register, 8> NewRegs; - SmallVector<MachineInstr *, 8> DeadInsts; - +class InitUndefLegacy : public MachineFunctionPass { public: static char ID; - InitUndef() : MachineFunctionPass(ID) {} + InitUndefLegacy() : MachineFunctionPass(ID) {} + bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -81,6 +74,20 @@ class InitUndef : public MachineFunctionPass { } StringRef getPassName() const override { return INIT_UNDEF_NAME; } +}; + +class InitUndef { + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + const TargetSubtargetInfo *ST; + const TargetRegisterInfo *TRI; + + // Newly added vregs, assumed to be fully rewritten + SmallSet<Register, 8> NewRegs; + SmallVector<MachineInstr *, 8> DeadInsts; + +public: + bool run(MachineFunction &MF); private: bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB, @@ -93,9 +100,9 @@ class InitUndef : public MachineFunctionPass { } // end anonymous namespace -char InitUndef::ID = 0; -INITIALIZE_PASS(InitUndef, DEBUG_TYPE, INIT_UNDEF_NAME, false, false) -char &llvm::InitUndefID = InitUndef::ID; +char InitUndefLegacy::ID = 0; +INITIALIZE_PASS(InitUndefLegacy, DEBUG_TYPE, INIT_UNDEF_NAME, false, false) +char &llvm::InitUndefID = InitUndefLegacy::ID; static bool isEarlyClobberMI(MachineInstr &MI) { return llvm::any_of(MI.all_defs(), [](const MachineOperand &DefMO) { @@ -246,7 +253,20 @@ bool InitUndef::processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB, return Changed; } -bool InitUndef::runOnMachineFunction(MachineFunction &MF) { +bool InitUndefLegacy::runOnMachineFunction(MachineFunction &MF) { + return InitUndef().run(MF); +} + +PreservedAnalyses InitUndefPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + if (!InitUndef().run(MF)) + return PreservedAnalyses::all(); + auto PA = getMachineFunctionPassPreservedAnalyses(); + PA.preserveSet<CFGAnalyses>(); + return PA; +} + +bool InitUndef::run(MachineFunction &MF) { ST = &MF.getSubtarget(); // The pass is only needed if early-clobber defs and undef ops cannot be diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 7740f622ede7c..5f7ce13ad8a3e 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -103,6 +103,7 @@ #include "llvm/CodeGen/GlobalMergeFunctions.h" #include "llvm/CodeGen/HardwareLoops.h" #include "llvm/CodeGen/IndirectBrExpand.h" +#include "llvm/CodeGen/InitUndef.h" #include "llvm/CodeGen/InterleavedAccess.h" #include "llvm/CodeGen/InterleavedLoadCombine.h" #include "llvm/CodeGen/JMCInstrumenter.h" diff --git a/llvm/test/CodeGen/AArch64/init-undef.mir b/llvm/test/CodeGen/AArch64/init-undef.mir index c9d23006d3523..cee41c053251d 100644 --- a/llvm/test/CodeGen/AArch64/init-undef.mir +++ b/llvm/test/CodeGen/AArch64/init-undef.mir @@ -2,6 +2,9 @@ # RUN: llc -mtriple=aarch64-- -aarch64-enable-subreg-liveness-tracking=false -run-pass=init-undef -o - %s | FileCheck %s # RUN: llc -mtriple=aarch64-- -aarch64-enable-subreg-liveness-tracking=true -run-pass=init-undef -o - %s | FileCheck %s +# RUN: llc -mtriple=aarch64-- -aarch64-enable-subreg-liveness-tracking=false -passes=init-undef -o - %s | FileCheck %s +# RUN: llc -mtriple=aarch64-- -aarch64-enable-subreg-liveness-tracking=true -passes=init-undef -o - %s | FileCheck %s + --- name: test_stxp_undef body: | diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll index 91c15565762de..e00b7ff83e322 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -9,9 +9,9 @@ ; GCN-O0: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,ee-instrument<post-inline>,scalarize-masked-mem-intrin,ExpandReductionsPass,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(lower-switch,lower-invoke,UnreachableBlockElimPass,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,fentry-insert,xray-instrumentation,patchable-function,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate<machine-function-info>)) -; GCN-O2: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,ExpandReductionsPass,early-cse<>,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,dead-mi-elimination,InitUndefPass,ProcessImplicitDefsPass,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,PostRAMachineSinkingPass,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate<machine-function-info>)) +; GCN-O2: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,ExpandReductionsPass,early-cse<>,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,dead-mi-elimination,init-undef,ProcessImplicitDefsPass,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,PostRAMachineSinkingPass,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate<machine-function-info>)) -; GCN-O3: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,ExpandReductionsPass,gvn<>,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,dead-mi-elimination,InitUndefPass,ProcessImplicitDefsPass,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,PostRAMachineSinkingPass,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate<machine-function-info>)) +; GCN-O3: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,pre-isel-intrinsic-lowering,function(expand-large-div-rem,expand-fp),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(infer-address-spaces,amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,verify,loop-mssa(loop-reduce),mergeicmps,expand-memcmp,gc-lowering,lower-constant-intrinsics,UnreachableBlockElimPass,consthoist,ReplaceWithVeclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,ExpandReductionsPass,gvn<>,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,cgscc(function(codegenprepare,load-store-vectorizer,lower-switch,lower-invoke,UnreachableBlockElimPass,flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions,detect-dead-lanes,dead-mi-elimination,init-undef,ProcessImplicitDefsPass,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,PostRAMachineSinkingPass,shrink-wrap,prolog-epilog,branch-folder,tailduplication,machine-latecleanup,machine-cp,post-ra-pseudos,postmisched,block-placement,fentry-insert,xray-instrumentation,patchable-function,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,AMDGPUWaitSGPRHazardsPass,amdgpu-insert-delay-alu,branch-relaxation,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,stack-frame-layout,verify),invalidate<machine-function-info>)) define void @empty() { ret void diff --git a/llvm/test/CodeGen/RISCV/rvv/handle-noreg-with-implicit-def.mir b/llvm/test/CodeGen/RISCV/rvv/handle-noreg-with-implicit-def.mir index 7b4d200ef8a3b..8cc89ed5af104 100644 --- a/llvm/test/CodeGen/RISCV/rvv/handle-noreg-with-implicit-def.mir +++ b/llvm/test/CodeGen/RISCV/rvv/handle-noreg-with-implicit-def.mir @@ -1,5 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 # RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -run-pass=init-undef -o - %s | FileCheck %s --check-prefix=MIR + +# RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -passes=init-undef -o - %s | FileCheck %s --check-prefix=MIR ... --- name: vrgather_all_undef diff --git a/llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir b/llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir index ed274cf49fa9b..31d0996852b76 100644 --- a/llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir +++ b/llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc %s -mtriple=riscv64 -mattr=+v -enable-subreg-liveness -run-pass=init-undef -o - | FileCheck %s +# RUN: llc %s -mtriple=riscv64 -mattr=+v -enable-subreg-liveness -passes=init-undef -o - | FileCheck %s ... --- diff --git a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.mir b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.mir index 9c23fd9902bb8..98cabe314917c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.mir +++ b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=riscv32 -mattr=+v -enable-subreg-liveness -run-pass init-undef -run-pass machineverifier %s -o - | FileCheck %s +# RUN: llc -mtriple=riscv32 -mattr=+v -enable-subreg-liveness -passes=init-undef,verify %s -o - | FileCheck %s --- | source_filename = "<stdin>" >From 9cf42547dc284b2281d1f358df41b0ff5e8c9709 Mon Sep 17 00:00:00 2001 From: Akshat Oke <akshat....@amd.com> Date: Mon, 5 May 2025 09:24:23 +0000 Subject: [PATCH 2/2] fix comment --- llvm/include/llvm/CodeGen/InitUndef.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/CodeGen/InitUndef.h b/llvm/include/llvm/CodeGen/InitUndef.h index 7274824a74905..be1cf4bfc9872 100644 --- a/llvm/include/llvm/CodeGen/InitUndef.h +++ b/llvm/include/llvm/CodeGen/InitUndef.h @@ -1,4 +1,4 @@ -//===- llvm/CodeGen/InitUndef.h --------------------*- C++ -*-===// +//===- llvm/CodeGen/InitUndef.h ---------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits