llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) <details> <summary>Changes</summary> Previously we handled the inverse situation only. --- Patch is 34.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/153022.diff 3 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp (+191-112) - (modified) llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir (+9-68) - (modified) llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll (+49-107) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp index 5206f32ec99e5..b71c70db5e6b3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp @@ -14,6 +14,10 @@ /// MFMA opcode. /// /// TODO: +/// - Handle rewrites of phis. This must be more careful than normal about the +/// reassignment. We do not want to introduce an AGPR-to-AGPR copy inside of a +/// loop, so it depends on the exact assignment of the copy. +/// /// - Update LiveIntervals incrementally instead of recomputing from scratch /// //===----------------------------------------------------------------------===// @@ -60,6 +64,32 @@ class AMDGPURewriteAGPRCopyMFMAImpl { return TII.isMAI(MI) && AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode()) != -1; } + /// Find AV_* registers assigned to AGPRs (or virtual registers which were + /// already required to be AGPR). + /// + /// \return the assigned physical register that \p VReg is assigned to if it + /// is an AGPR, otherwise MCRegister(). + MCRegister getAssignedAGPR(Register VReg) const { + MCRegister PhysReg = VRM.getPhys(VReg); + if (!PhysReg) + return MCRegister(); + + const TargetRegisterClass *VirtRegRC = MRI.getRegClass(VReg); + if (!TRI.hasAGPRs(VirtRegRC)) + return MCRegister(); + + if (!TRI.hasVGPRs(VirtRegRC)) + return PhysReg; + + // If this is an AV register, we have to check if the actual assignment is + // to an AGPR + const TargetRegisterClass *AssignedRC = TRI.getPhysRegBaseClass(PhysReg); + return TRI.isAGPRClass(AssignedRC) ? PhysReg : MCRegister(); + } + + bool tryReassigningMFMAChain(MachineInstr &MFMA, unsigned HintOpIdx, + MCPhysReg PhysRegHint) const; + /// Compute the register class constraints based on the uses of \p Reg, /// excluding MFMA uses from which can be rewritten to change the register /// class constraint. This should be nearly identical to @@ -74,6 +104,8 @@ class AMDGPURewriteAGPRCopyMFMAImpl { Register Reg, SmallVectorImpl<MachineInstr *> &RewriteCandidates, SmallSetVector<Register, 4> &RewriteRegs) const; + bool tryFoldCopiesToAGPR(Register VReg, MCRegister AssignedAGPR) const; + bool tryFoldCopiesFromAGPR(Register VReg, MCRegister AssignedAGPR) const; bool run(MachineFunction &MF) const; }; @@ -152,6 +184,88 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable( return true; } +bool AMDGPURewriteAGPRCopyMFMAImpl::tryReassigningMFMAChain( + MachineInstr &MFMA, unsigned HintOpIdx, MCPhysReg PhysRegHint) const { + // src2 and dst have the same physical class constraint; try to preserve + // the original src2 subclass if one were to exist. + SmallVector<MachineInstr *, 4> RewriteCandidates = {&MFMA}; + SmallSetVector<Register, 4> RewriteRegs; + + Register MFMAHintReg = MFMA.getOperand(HintOpIdx).getReg(); + // Make sure we reassign the MFMA we found the copy from first. We want + // to ensure dst ends up in the physreg we were originally copying to. + RewriteRegs.insert(MFMAHintReg); + + // We've found av = COPY (MFMA), and need to verify that we can trivially + // rewrite src2 to use the new AGPR. If we can't trivially replace it, + // we're going to induce as many copies as we would have emitted in the + // first place, as well as need to assign another register, and need to + // figure out where to put them. The live range splitting is smarter than + // anything we're doing here, so trust it did something reasonable. + // + // Note recomputeRegClassExceptRewritable will consider the constraints of + // this MFMA's src2 as well as the src2/dst of any transitive MFMA users. + if (!recomputeRegClassExceptRewritable(MFMAHintReg, RewriteCandidates, + RewriteRegs)) { + LLVM_DEBUG(dbgs() << "Could not recompute the regclass of dst reg " + << printReg(MFMAHintReg, &TRI) << '\n'); + return false; + } + + // If src2 and dst are different registers, we need to also reassign the + // input to an available AGPR if it is compatible with all other uses. + // + // If we can't reassign it, we'd need to introduce a different copy + // which is likely worse than the copy we'd be saving. + // + // It's likely that the MFMA is used in sequence with other MFMAs; if we + // cannot migrate the full use/def chain of MFMAs, we would need to + // introduce intermediate copies somewhere. So we only make the + // transform if all the interfering MFMAs can also be migrated. Collect + // the set of rewritable MFMAs and check if we can assign an AGPR at + // that point. + // + // If any of the MFMAs aren't reassignable, we give up and rollback to + // the original register assignments. + + using RecoloringStack = + SmallVector<std::pair<const LiveInterval *, MCRegister>, 8>; + RecoloringStack TentativeReassignments; + + for (Register RewriteReg : RewriteRegs) { + LiveInterval &LI = LIS.getInterval(RewriteReg); + TentativeReassignments.push_back({&LI, VRM.getPhys(RewriteReg)}); + LRM.unassign(LI); + } + + if (!attemptReassignmentsToAGPR(RewriteRegs, PhysRegHint)) { + // Roll back the register assignments to the original state. + for (auto [LI, OldAssign] : TentativeReassignments) { + if (VRM.hasPhys(LI->reg())) + LRM.unassign(*LI); + LRM.assign(*LI, OldAssign); + } + + return false; + } + + // Fixup the register classes of the virtual registers now that we've + // committed to the reassignments. + for (Register InterferingReg : RewriteRegs) { + const TargetRegisterClass *EquivalentAGPRRegClass = + TRI.getEquivalentAGPRClass(MRI.getRegClass(InterferingReg)); + MRI.setRegClass(InterferingReg, EquivalentAGPRRegClass); + } + + for (MachineInstr *RewriteCandidate : RewriteCandidates) { + int NewMFMAOp = + AMDGPU::getMFMASrcCVDstAGPROp(RewriteCandidate->getOpcode()); + RewriteCandidate->setDesc(TII.get(NewMFMAOp)); + } + + return true; +} + /// Attempt to reassign the registers in \p InterferingRegs to be AGPRs, with a /// preference to use \p PhysReg first. Returns false if the reassignments /// cannot be trivially performed. @@ -204,6 +318,78 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::attemptReassignmentsToAGPR( return true; } +/// Identify copies that look like: +/// %vdst:vgpr = V_MFMA_.. %src0:av, %src1:av, %src2:vgpr +/// %agpr = COPY %vgpr +/// +/// Then try to replace the transitive uses of %src2 and %vdst with the AGPR +/// versions of the MFMA. This should cover the common case. +bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesToAGPR( + Register VReg, MCRegister AssignedAGPR) const { + bool MadeChange = false; + for (MachineInstr &UseMI : MRI.def_instructions(VReg)) { + if (!UseMI.isCopy()) + continue; + + Register CopySrcReg = UseMI.getOperand(1).getReg(); + if (!CopySrcReg.isVirtual()) + continue; + + // TODO: Handle loop phis copied to AGPR. e.g. + // + // loop: + // %phi:vgpr = COPY %mfma:vgpr + // %mfma:vgpr = V_MFMA_xxx_vgprcd_e64 %a, %b, %phi + // s_cbranch_vccnz loop + // + // endloop: + // %agpr = mfma + // + // We need to be sure that %phi is assigned to the same physical register as + // %mfma, or else we will just be moving copies into the loop. + + for (MachineInstr &CopySrcDefMI : MRI.def_instructions(CopySrcReg)) { + if (isRewriteCandidate(CopySrcDefMI) && + tryReassigningMFMAChain(CopySrcDefMI, 0, AssignedAGPR)) + MadeChange = true; + } + } + + return MadeChange; +} + +/// Identify copies that look like: +/// %src:vgpr = COPY %src:agpr +/// %vdst:vgpr = V_MFMA_... %src0:av, %src1:av, %src:vgpr +/// +/// Then try to replace the transitive uses of %src2 and %vdst with the AGPR +/// versions of the MFMA. This should cover rarer cases, and will generally be +/// redundant with tryFoldCopiesToAGPR. +bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesFromAGPR( + Register VReg, MCRegister AssignedAGPR) const { + bool MadeChange = false; + for (MachineInstr &UseMI : MRI.use_instructions(VReg)) { + if (!UseMI.isCopy()) + continue; + + Register CopyDstReg = UseMI.getOperand(0).getReg(); + if (!CopyDstReg.isVirtual()) + continue; + + for (MachineInstr &CopyUseMI : MRI.use_instructions(CopyDstReg)) { + if (isRewriteCandidate(CopyUseMI)) { + const MachineOperand *Op = + CopyUseMI.findRegisterUseOperand(CopyDstReg, /*TRI=*/nullptr); + if (tryReassigningMFMAChain(CopyUseMI, Op->getOperandNo(), + VRM.getPhys(Op->getReg()))) + MadeChange = true; + } + } + } + + return MadeChange; +} + bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const { // This only applies on subtargets that have a configurable AGPR vs. VGPR // allocation. @@ -220,121 +406,14 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const { for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { Register VReg = Register::index2VirtReg(I); - Register PhysReg = VRM.getPhys(VReg); - if (!PhysReg) - continue; - - // Find AV_* registers assigned to AGPRs. - const TargetRegisterClass *VirtRegRC = MRI.getRegClass(VReg); - if (!TRI.hasAGPRs(VirtRegRC)) + MCRegister AssignedAGPR = getAssignedAGPR(VReg); + if (!AssignedAGPR) continue; - const TargetRegisterClass *AssignedRC = VirtRegRC; - if (TRI.hasVGPRs(VirtRegRC)) { - // If this is an AV register, we have to check if the actual assignment is - // to an AGPR - AssignedRC = TRI.getPhysRegBaseClass(PhysReg); - if (!TRI.isAGPRClass(AssignedRC)) - continue; - } - - LiveInterval &LI = LIS.getInterval(VReg); - - for (VNInfo *VNI : LI.vnis()) { - MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def); - if (!DefMI || !DefMI->isCopy()) - continue; - - Register MFMADstReg = DefMI->getOperand(1).getReg(); - if (!MFMADstReg.isVirtual()) - continue; - - LiveInterval &CopySrcLI = LIS.getInterval(MFMADstReg); - LiveQueryResult LRQ = CopySrcLI.Query(VNI->def.getRegSlot()); - MachineInstr *MFMA = LIS.getInstructionFromIndex(LRQ.valueIn()->def); - if (!MFMA || !isRewriteCandidate(*MFMA)) - continue; - - // src2 and dst have the same physical class constraint; try to preserve - // the original src2 subclass if one were to exist. - SmallVector<MachineInstr *, 4> RewriteCandidates = {MFMA}; - SmallSetVector<Register, 4> RewriteRegs; - - // Make sure we reassign the MFMA we found the copy from first. We want - // to ensure dst ends up in the physreg we were originally copying to. - RewriteRegs.insert(MFMADstReg); - - // We've found av = COPY (MFMA), and need to verify that we can trivially - // rewrite src2 to use the new AGPR. If we can't trivially replace it, - // we're going to induce as many copies as we would have emitted in the - // first place, as well as need to assign another register, and need to - // figure out where to put them. The live range splitting is smarter than - // anything we're doing here, so trust it did something reasonable. - // - // Note recomputeRegClassExceptRewritable will consider the constraints of - // this MFMA's src2 as well as the src2/dst of any transitive MFMA users. - if (!recomputeRegClassExceptRewritable(MFMADstReg, RewriteCandidates, - RewriteRegs)) { - LLVM_DEBUG(dbgs() << "Could not recompute the regclass of dst reg " - << printReg(MFMADstReg, &TRI) << '\n'); - continue; - } - - // If src2 and dst are different registers, we need to also reassign the - // input to an available AGPR if it is compatible with all other uses. - // - // If we can't reassign it, we'd need to introduce a different copy - // which is likely worse than the copy we'd be saving. - // - // It's likely that the MFMA is used in sequence with other MFMAs; if we - // cannot migrate the full use/def chain of MFMAs, we would need to - // introduce intermediate copies somewhere. So we only make the - // transform if all the interfering MFMAs can also be migrated. Collect - // the set of rewritable MFMAs and check if we can assign an AGPR at - // that point. - // - // If any of the MFMAs aren't reassignable, we give up and rollback to - // the original register assignments. - - using RecoloringStack = - SmallVector<std::pair<const LiveInterval *, MCRegister>, 8>; - RecoloringStack TentativeReassignments; - - for (Register RewriteReg : RewriteRegs) { - LiveInterval &LI = LIS.getInterval(RewriteReg); - TentativeReassignments.push_back({&LI, VRM.getPhys(RewriteReg)}); - LRM.unassign(LI); - } - - if (!attemptReassignmentsToAGPR(RewriteRegs, PhysReg)) { - // Roll back the register assignments to the original state. - for (auto [LI, OldAssign] : TentativeReassignments) { - if (VRM.hasPhys(LI->reg())) - LRM.unassign(*LI); - LRM.assign(*LI, OldAssign); - } - - continue; - } - - // Fixup the register classes of the virtual registers now that we've - // committed to the reassignments. - for (Register InterferingReg : RewriteRegs) { - const TargetRegisterClass *EquivalentAGPRRegClass = - TRI.getEquivalentAGPRClass(MRI.getRegClass(InterferingReg)); - MRI.setRegClass(InterferingReg, EquivalentAGPRRegClass); - } - - for (MachineInstr *RewriteCandidate : RewriteCandidates) { - int NewMFMAOp = - AMDGPU::getMFMASrcCVDstAGPROp(RewriteCandidate->getOpcode()); - RewriteCandidate->setDesc(TII.get(NewMFMAOp)); - } - - // We likely left an identity copy behind after assignment; let - // VirtRegRewriter deal with it later. + if (tryFoldCopiesToAGPR(VReg, AssignedAGPR)) + MadeChange = true; + if (tryFoldCopiesFromAGPR(VReg, AssignedAGPR)) MadeChange = true; - } } return MadeChange; diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir index 7fdc8c0d8019b..632401b6128c5 100644 --- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir +++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir @@ -69,9 +69,9 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3 ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]] - ; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[COPY3]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]], 0, 0, implicit $exec :: (store (s64), addrspace 1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_:%[0-9]+]]:areg_64_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[COPY3]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[V_MFMA_F64_4X4X4F64_e64_]], 0, 0, implicit $exec :: (store (s64), addrspace 1) ; CHECK-NEXT: SI_RETURN %0:vreg_64_align2 = COPY $vgpr4_vgpr5 %1:av_64_align2 = COPY $vgpr0_vgpr1 @@ -97,8 +97,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3 ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]].sub0_sub1:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[COPY3]].sub2_sub3, 0, 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]].sub0_sub1:areg_128_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[COPY3]].sub2_sub3, 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: SI_RETURN %0:vreg_64_align2 = COPY $vgpr4_vgpr5 @@ -126,10 +126,10 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3 ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:areg_64_align2 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1) - ; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0:vreg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]].sub1:vreg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1 - ; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[COPY3]], 0, 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]], 0, 0, implicit $exec :: (store (s64), addrspace 1) + ; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0:areg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]].sub1:areg_64_align2 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1 + ; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_:%[0-9]+]]:areg_64_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[COPY3]], 0, 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[V_MFMA_F64_4X4X4F64_e64_]], 0, 0, implicit $exec :: (store (s64), addrspace 1) ; CHECK-NEXT: SI_RETURN %0:vreg_64_align2 = COPY $vgpr4_vgpr5 %1:av_64_align2 = COPY $vgpr0_vgpr1 @@ -200,62 +200,3 @@ body: | GLOBAL_STORE_DWORDX4 %0, %4, 0, 0, implicit $exec :: (store (s128), addrspace 1) SI_RETURN ... - -# Degenerate case. Copy from AGPR to VGPR is dead undef subreg def ---- -name: test_rewrite_mfma_copy_from_agpr_undef_vdst_subreg_use_imm_src2 -tracksRegLiveness: true -body: | - bb.0: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - - ; CHECK-LABEL: name: test_rewrite_mfma_copy_from_agpr_undef_vdst_subreg_use_imm_src2 - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1) - ; CHECK-NEXT: dead [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]] - ; CHECK-NEXT: undef [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]].sub0_sub1:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], 0, 0, 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]], 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; CHECK-NEXT: SI_RETURN - %0:vreg_64_align2 = COPY $vgpr4_vgpr5 - %1:av_64_align2 = COPY $vgpr0_vgpr1 - %2:av_64_align2 = COPY $vgpr2_vgpr3 - %3:areg_128_align2 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec :: (load (s128), addrspace 1) - %4:vreg_128_align2 = COPY %3 - undef %4.sub0_sub1:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, 0, 0, 0, 0, implicit $mode, implicit $exec - GLOBAL_STORE_DWORDX4 %0, %4, 0, 0, implicit $exec :: (store (s128), addrspace 1) - SI_RETURN -... - -# Degenerate case. Copy from AGPR to VGPR is dead, but same register -# is redefined as whole register. ---- -name... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/153022 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits