llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

<details>
<summary>Changes</summary>

Previously we handled the inverse situation only.

---

Patch is 34.51 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/153022.diff


3 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp (+191-112) 
- (modified) llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir 
(+9-68) 
- (modified) llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll (+49-107) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
index 5206f32ec99e5..b71c70db5e6b3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
@@ -14,6 +14,10 @@
 /// MFMA opcode.
 ///
 /// TODO:
+/// - Handle rewrites of phis. This must be more careful than normal about the
+///   reassignment. We do not want to introduce an AGPR-to-AGPR copy inside of 
a
+///   loop, so it depends on the exact assignment of the copy.
+///
 ///  - Update LiveIntervals incrementally instead of recomputing from scratch
 ///
 
//===----------------------------------------------------------------------===//
@@ -60,6 +64,32 @@ class AMDGPURewriteAGPRCopyMFMAImpl {
     return TII.isMAI(MI) && AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode()) != 
-1;
   }
 
+  /// Find AV_* registers assigned to AGPRs (or virtual registers which were
+  /// already required to be AGPR).
+  ///
+  /// \return the assigned physical register that \p VReg is assigned to if it
+  /// is an AGPR, otherwise MCRegister().
+  MCRegister getAssignedAGPR(Register VReg) const {
+    MCRegister PhysReg = VRM.getPhys(VReg);
+    if (!PhysReg)
+      return MCRegister();
+
+    const TargetRegisterClass *VirtRegRC = MRI.getRegClass(VReg);
+    if (!TRI.hasAGPRs(VirtRegRC))
+      return MCRegister();
+
+    if (!TRI.hasVGPRs(VirtRegRC))
+      return PhysReg;
+
+    // If this is an AV register, we have to check if the actual assignment is
+    // to an AGPR
+    const TargetRegisterClass *AssignedRC = TRI.getPhysRegBaseClass(PhysReg);
+    return TRI.isAGPRClass(AssignedRC) ? PhysReg : MCRegister();
+  }
+
+  bool tryReassigningMFMAChain(MachineInstr &MFMA, unsigned HintOpIdx,
+                               MCPhysReg PhysRegHint) const;
+
   /// Compute the register class constraints based on the uses of \p Reg,
   /// excluding MFMA uses from which can be rewritten to change the register
   /// class constraint. This should be nearly identical to
@@ -74,6 +104,8 @@ class AMDGPURewriteAGPRCopyMFMAImpl {
       Register Reg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
       SmallSetVector<Register, 4> &RewriteRegs) const;
 
+  bool tryFoldCopiesToAGPR(Register VReg, MCRegister AssignedAGPR) const;
+  bool tryFoldCopiesFromAGPR(Register VReg, MCRegister AssignedAGPR) const;
   bool run(MachineFunction &MF) const;
 };
 
@@ -152,6 +184,88 @@ bool 
AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
   return true;
 }
 
+bool AMDGPURewriteAGPRCopyMFMAImpl::tryReassigningMFMAChain(
+    MachineInstr &MFMA, unsigned HintOpIdx, MCPhysReg PhysRegHint) const {
+  // src2 and dst have the same physical class constraint; try to preserve
+  // the original src2 subclass if one were to exist.
+  SmallVector<MachineInstr *, 4> RewriteCandidates = {&MFMA};
+  SmallSetVector<Register, 4> RewriteRegs;
+
+  Register MFMAHintReg = MFMA.getOperand(HintOpIdx).getReg();
+  // Make sure we reassign the MFMA we found the copy from first. We want
+  // to ensure dst ends up in the physreg we were originally copying to.
+  RewriteRegs.insert(MFMAHintReg);
+
+  // We've found av = COPY (MFMA), and need to verify that we can trivially
+  // rewrite src2 to use the new AGPR. If we can't trivially replace it,
+  // we're going to induce as many copies as we would have emitted in the
+  // first place, as well as need to assign another register, and need to
+  // figure out where to put them. The live range splitting is smarter than
+  // anything we're doing here, so trust it did something reasonable.
+  //
+  // Note recomputeRegClassExceptRewritable will consider the constraints of
+  // this MFMA's src2 as well as the src2/dst of any transitive MFMA users.
+  if (!recomputeRegClassExceptRewritable(MFMAHintReg, RewriteCandidates,
+                                         RewriteRegs)) {
+    LLVM_DEBUG(dbgs() << "Could not recompute the regclass of dst reg "
+                      << printReg(MFMAHintReg, &TRI) << '\n');
+    return false;
+  }
+
+  // If src2 and dst are different registers, we need to also reassign the
+  // input to an available AGPR if it is compatible with all other uses.
+  //
+  // If we can't reassign it, we'd need to introduce a different copy
+  // which is likely worse than the copy we'd be saving.
+  //
+  // It's likely that the MFMA is used in sequence with other MFMAs; if we
+  // cannot migrate the full use/def chain of MFMAs, we would need to
+  // introduce intermediate copies somewhere. So we only make the
+  // transform if all the interfering MFMAs can also be migrated. Collect
+  // the set of rewritable MFMAs and check if we can assign an AGPR at
+  // that point.
+  //
+  // If any of the MFMAs aren't reassignable, we give up and rollback to
+  // the original register assignments.
+
+  using RecoloringStack =
+      SmallVector<std::pair<const LiveInterval *, MCRegister>, 8>;
+  RecoloringStack TentativeReassignments;
+
+  for (Register RewriteReg : RewriteRegs) {
+    LiveInterval &LI = LIS.getInterval(RewriteReg);
+    TentativeReassignments.push_back({&LI, VRM.getPhys(RewriteReg)});
+    LRM.unassign(LI);
+  }
+
+  if (!attemptReassignmentsToAGPR(RewriteRegs, PhysRegHint)) {
+    // Roll back the register assignments to the original state.
+    for (auto [LI, OldAssign] : TentativeReassignments) {
+      if (VRM.hasPhys(LI->reg()))
+        LRM.unassign(*LI);
+      LRM.assign(*LI, OldAssign);
+    }
+
+    return false;
+  }
+
+  // Fixup the register classes of the virtual registers now that we've
+  // committed to the reassignments.
+  for (Register InterferingReg : RewriteRegs) {
+    const TargetRegisterClass *EquivalentAGPRRegClass =
+        TRI.getEquivalentAGPRClass(MRI.getRegClass(InterferingReg));
+    MRI.setRegClass(InterferingReg, EquivalentAGPRRegClass);
+  }
+
+  for (MachineInstr *RewriteCandidate : RewriteCandidates) {
+    int NewMFMAOp =
+        AMDGPU::getMFMASrcCVDstAGPROp(RewriteCandidate->getOpcode());
+    RewriteCandidate->setDesc(TII.get(NewMFMAOp));
+  }
+
+  return true;
+}
+
 /// Attempt to reassign the registers in \p InterferingRegs to be AGPRs, with a
 /// preference to use \p PhysReg first. Returns false if the reassignments
 /// cannot be trivially performed.
@@ -204,6 +318,78 @@ bool 
AMDGPURewriteAGPRCopyMFMAImpl::attemptReassignmentsToAGPR(
   return true;
 }
 
+/// Identify copies that look like:
+/// %vdst:vgpr = V_MFMA_.. %src0:av, %src1:av, %src2:vgpr
+/// %agpr = COPY %vgpr
+///
+/// Then try to replace the transitive uses of %src2 and %vdst with the AGPR
+/// versions of the MFMA. This should cover the common case.
+bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesToAGPR(
+    Register VReg, MCRegister AssignedAGPR) const {
+  bool MadeChange = false;
+  for (MachineInstr &UseMI : MRI.def_instructions(VReg)) {
+    if (!UseMI.isCopy())
+      continue;
+
+    Register CopySrcReg = UseMI.getOperand(1).getReg();
+    if (!CopySrcReg.isVirtual())
+      continue;
+
+    // TODO: Handle loop phis copied to AGPR. e.g.
+    //
+    // loop:
+    //   %phi:vgpr = COPY %mfma:vgpr
+    //   %mfma:vgpr = V_MFMA_xxx_vgprcd_e64 %a, %b, %phi
+    //   s_cbranch_vccnz loop
+    //
+    // endloop:
+    //   %agpr = mfma
+    //
+    // We need to be sure that %phi is assigned to the same physical register 
as
+    // %mfma, or else we will just be moving copies into the loop.
+
+    for (MachineInstr &CopySrcDefMI : MRI.def_instructions(CopySrcReg)) {
+      if (isRewriteCandidate(CopySrcDefMI) &&
+          tryReassigningMFMAChain(CopySrcDefMI, 0, AssignedAGPR))
+        MadeChange = true;
+    }
+  }
+
+  return MadeChange;
+}
+
+/// Identify copies that look like:
+/// %src:vgpr = COPY %src:agpr
+/// %vdst:vgpr = V_MFMA_... %src0:av, %src1:av, %src:vgpr
+///
+/// Then try to replace the transitive uses of %src2 and %vdst with the AGPR
+/// versions of the MFMA. This should cover rarer cases, and will generally be
+/// redundant with tryFoldCopiesToAGPR.
+bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesFromAGPR(
+    Register VReg, MCRegister AssignedAGPR) const {
+  bool MadeChange = false;
+  for (MachineInstr &UseMI : MRI.use_instructions(VReg)) {
+    if (!UseMI.isCopy())
+      continue;
+
+    Register CopyDstReg = UseMI.getOperand(0).getReg();
+    if (!CopyDstReg.isVirtual())
+      continue;
+
+    for (MachineInstr &CopyUseMI : MRI.use_instructions(CopyDstReg)) {
+      if (isRewriteCandidate(CopyUseMI)) {
+        const MachineOperand *Op =
+            CopyUseMI.findRegisterUseOperand(CopyDstReg, /*TRI=*/nullptr);
+        if (tryReassigningMFMAChain(CopyUseMI, Op->getOperandNo(),
+                                    VRM.getPhys(Op->getReg())))
+          MadeChange = true;
+      }
+    }
+  }
+
+  return MadeChange;
+}
+
 bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
   // This only applies on subtargets that have a configurable AGPR vs. VGPR
   // allocation.
@@ -220,121 +406,14 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction 
&MF) const {
 
   for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
     Register VReg = Register::index2VirtReg(I);
-    Register PhysReg = VRM.getPhys(VReg);
-    if (!PhysReg)
-      continue;
-
-    // Find AV_* registers assigned to AGPRs.
-    const TargetRegisterClass *VirtRegRC = MRI.getRegClass(VReg);
-    if (!TRI.hasAGPRs(VirtRegRC))
+    MCRegister AssignedAGPR = getAssignedAGPR(VReg);
+    if (!AssignedAGPR)
       continue;
 
-    const TargetRegisterClass *AssignedRC = VirtRegRC;
-    if (TRI.hasVGPRs(VirtRegRC)) {
-      // If this is an AV register, we have to check if the actual assignment 
is
-      // to an AGPR
-      AssignedRC = TRI.getPhysRegBaseClass(PhysReg);
-      if (!TRI.isAGPRClass(AssignedRC))
-        continue;
-    }
-
-    LiveInterval &LI = LIS.getInterval(VReg);
-
-    for (VNInfo *VNI : LI.vnis()) {
-      MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def);
-      if (!DefMI || !DefMI->isCopy())
-        continue;
-
-      Register MFMADstReg = DefMI->getOperand(1).getReg();
-      if (!MFMADstReg.isVirtual())
-        continue;
-
-      LiveInterval &CopySrcLI = LIS.getInterval(MFMADstReg);
-      LiveQueryResult LRQ = CopySrcLI.Query(VNI->def.getRegSlot());
-      MachineInstr *MFMA = LIS.getInstructionFromIndex(LRQ.valueIn()->def);
-      if (!MFMA || !isRewriteCandidate(*MFMA))
-        continue;
-
-      // src2 and dst have the same physical class constraint; try to preserve
-      // the original src2 subclass if one were to exist.
-      SmallVector<MachineInstr *, 4> RewriteCandidates = {MFMA};
-      SmallSetVector<Register, 4> RewriteRegs;
-
-      // Make sure we reassign the MFMA we found the copy from first. We want
-      // to ensure dst ends up in the physreg we were originally copying to.
-      RewriteRegs.insert(MFMADstReg);
-
-      // We've found av = COPY (MFMA), and need to verify that we can trivially
-      // rewrite src2 to use the new AGPR. If we can't trivially replace it,
-      // we're going to induce as many copies as we would have emitted in the
-      // first place, as well as need to assign another register, and need to
-      // figure out where to put them. The live range splitting is smarter than
-      // anything we're doing here, so trust it did something reasonable.
-      //
-      // Note recomputeRegClassExceptRewritable will consider the constraints 
of
-      // this MFMA's src2 as well as the src2/dst of any transitive MFMA users.
-      if (!recomputeRegClassExceptRewritable(MFMADstReg, RewriteCandidates,
-                                             RewriteRegs)) {
-        LLVM_DEBUG(dbgs() << "Could not recompute the regclass of dst reg "
-                          << printReg(MFMADstReg, &TRI) << '\n');
-        continue;
-      }
-
-      // If src2 and dst are different registers, we need to also reassign the
-      // input to an available AGPR if it is compatible with all other uses.
-      //
-      // If we can't reassign it, we'd need to introduce a different copy
-      // which is likely worse than the copy we'd be saving.
-      //
-      // It's likely that the MFMA is used in sequence with other MFMAs; if we
-      // cannot migrate the full use/def chain of MFMAs, we would need to
-      // introduce intermediate copies somewhere. So we only make the
-      // transform if all the interfering MFMAs can also be migrated. Collect
-      // the set of rewritable MFMAs and check if we can assign an AGPR at
-      // that point.
-      //
-      // If any of the MFMAs aren't reassignable, we give up and rollback to
-      // the original register assignments.
-
-      using RecoloringStack =
-          SmallVector<std::pair<const LiveInterval *, MCRegister>, 8>;
-      RecoloringStack TentativeReassignments;
-
-      for (Register RewriteReg : RewriteRegs) {
-        LiveInterval &LI = LIS.getInterval(RewriteReg);
-        TentativeReassignments.push_back({&LI, VRM.getPhys(RewriteReg)});
-        LRM.unassign(LI);
-      }
-
-      if (!attemptReassignmentsToAGPR(RewriteRegs, PhysReg)) {
-        // Roll back the register assignments to the original state.
-        for (auto [LI, OldAssign] : TentativeReassignments) {
-          if (VRM.hasPhys(LI->reg()))
-            LRM.unassign(*LI);
-          LRM.assign(*LI, OldAssign);
-        }
-
-        continue;
-      }
-
-      // Fixup the register classes of the virtual registers now that we've
-      // committed to the reassignments.
-      for (Register InterferingReg : RewriteRegs) {
-        const TargetRegisterClass *EquivalentAGPRRegClass =
-            TRI.getEquivalentAGPRClass(MRI.getRegClass(InterferingReg));
-        MRI.setRegClass(InterferingReg, EquivalentAGPRRegClass);
-      }
-
-      for (MachineInstr *RewriteCandidate : RewriteCandidates) {
-        int NewMFMAOp =
-            AMDGPU::getMFMASrcCVDstAGPROp(RewriteCandidate->getOpcode());
-        RewriteCandidate->setDesc(TII.get(NewMFMAOp));
-      }
-
-      // We likely left an identity copy behind after assignment; let
-      // VirtRegRewriter deal with it later.
+    if (tryFoldCopiesToAGPR(VReg, AssignedAGPR))
+      MadeChange = true;
+    if (tryFoldCopiesFromAGPR(VReg, AssignedAGPR))
       MadeChange = true;
-    }
   }
 
   return MadeChange;
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir 
b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir
index 7fdc8c0d8019b..632401b6128c5 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir
@@ -69,9 +69,9 @@ body:             |
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
     ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = 
GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1)
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY 
[[GLOBAL_LOAD_DWORDX4_]]
-    ; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]]:vreg_64_align2 = 
V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[COPY3]].sub0_sub1, 0, 0, 
0, implicit $mode, implicit $exec
-    ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], 
[[V_MFMA_F64_4X4X4F64_vgprcd_e64_]], 0, 0, implicit $exec :: (store (s64), 
addrspace 1)
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY 
[[GLOBAL_LOAD_DWORDX4_]]
+    ; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_:%[0-9]+]]:areg_64_align2 = 
V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[COPY3]].sub0_sub1, 0, 0, 0, 
implicit $mode, implicit $exec
+    ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[V_MFMA_F64_4X4X4F64_e64_]], 
0, 0, implicit $exec :: (store (s64), addrspace 1)
     ; CHECK-NEXT: SI_RETURN
     %0:vreg_64_align2 = COPY $vgpr4_vgpr5
     %1:av_64_align2 = COPY $vgpr0_vgpr1
@@ -97,8 +97,8 @@ body:             |
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
     ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = 
GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1)
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY 
[[GLOBAL_LOAD_DWORDX4_]]
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]].sub0_sub1:vreg_128_align2 = 
V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[COPY3]].sub2_sub3, 0, 0, 
0, implicit $mode, implicit $exec
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY 
[[GLOBAL_LOAD_DWORDX4_]]
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]].sub0_sub1:areg_128_align2 = 
V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[COPY3]].sub2_sub3, 0, 0, 0, 
implicit $mode, implicit $exec
     ; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY3]], 0, 0, implicit 
$exec :: (store (s128), addrspace 1)
     ; CHECK-NEXT: SI_RETURN
     %0:vreg_64_align2 = COPY $vgpr4_vgpr5
@@ -126,10 +126,10 @@ body:             |
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
     ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:areg_64_align2 = 
GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (s64), addrspace 1)
-    ; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0:vreg_64_align2 = COPY 
[[GLOBAL_LOAD_DWORDX2_]].sub0
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]].sub1:vreg_64_align2 = COPY 
[[GLOBAL_LOAD_DWORDX2_]].sub1
-    ; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]]:vreg_64_align2 = 
V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[COPY3]], 0, 0, 0, 
implicit $mode, implicit $exec
-    ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], 
[[V_MFMA_F64_4X4X4F64_vgprcd_e64_]], 0, 0, implicit $exec :: (store (s64), 
addrspace 1)
+    ; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0:areg_64_align2 = COPY 
[[GLOBAL_LOAD_DWORDX2_]].sub0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]].sub1:areg_64_align2 = COPY 
[[GLOBAL_LOAD_DWORDX2_]].sub1
+    ; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_:%[0-9]+]]:areg_64_align2 = 
V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[COPY3]], 0, 0, 0, implicit 
$mode, implicit $exec
+    ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[V_MFMA_F64_4X4X4F64_e64_]], 
0, 0, implicit $exec :: (store (s64), addrspace 1)
     ; CHECK-NEXT: SI_RETURN
     %0:vreg_64_align2 = COPY $vgpr4_vgpr5
     %1:av_64_align2 = COPY $vgpr0_vgpr1
@@ -200,62 +200,3 @@ body:             |
     GLOBAL_STORE_DWORDX4 %0, %4, 0, 0, implicit $exec :: (store (s128), 
addrspace 1)
     SI_RETURN
 ...
-
-# Degenerate case. Copy from AGPR to VGPR is dead undef subreg def
----
-name:  test_rewrite_mfma_copy_from_agpr_undef_vdst_subreg_use_imm_src2
-tracksRegLiveness: true
-body:             |
-  bb.0:
-    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
-
-    ; CHECK-LABEL: name: 
test_rewrite_mfma_copy_from_agpr_undef_vdst_subreg_use_imm_src2
-    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
-    ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = 
GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1)
-    ; CHECK-NEXT: dead [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY 
[[GLOBAL_LOAD_DWORDX4_]]
-    ; CHECK-NEXT: undef 
[[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]].sub0_sub1:vreg_128_align2 = 
V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], 0, 0, 0, 0, implicit 
$mode, implicit $exec
-    ; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], 
[[V_MFMA_F64_4X4X4F64_vgprcd_e64_]], 0, 0, implicit $exec :: (store (s128), 
addrspace 1)
-    ; CHECK-NEXT: SI_RETURN
-    %0:vreg_64_align2 = COPY $vgpr4_vgpr5
-    %1:av_64_align2 = COPY $vgpr0_vgpr1
-    %2:av_64_align2 = COPY $vgpr2_vgpr3
-    %3:areg_128_align2 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec :: (load 
(s128), addrspace 1)
-    %4:vreg_128_align2 = COPY %3
-    undef %4.sub0_sub1:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, 
%2, 0, 0, 0, 0, implicit $mode, implicit $exec
-    GLOBAL_STORE_DWORDX4 %0, %4, 0, 0, implicit $exec :: (store (s128), 
addrspace 1)
-    SI_RETURN
-...
-
-# Degenerate case. Copy from AGPR to VGPR is dead, but same register
-# is redefined as whole register.
----
-name...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/153022
_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to