llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

<details>
<summary>Changes</summary>

Handle a special case for copies from AGPR VGPR on the MFMA inputs.
If the "input" is really a subregister def, we will not see the
usual copy to VGPR for src2, only the read of the subregister def.
Not sure if this pattern appears in practice.

---
Full diff: https://github.com/llvm/llvm-project/pull/153023.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp (+6-5) 
- (modified) llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir 
(+2-2) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
index b71c70db5e6b3..4e0d64a20690e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
@@ -375,13 +375,14 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesFromAGPR(
     Register CopyDstReg = UseMI.getOperand(0).getReg();
     if (!CopyDstReg.isVirtual())
       continue;
+    for (MachineOperand &CopyUseMO : MRI.reg_nodbg_operands(CopyDstReg)) {
+      if (!CopyUseMO.readsReg())
+        continue;
 
-    for (MachineInstr &CopyUseMI : MRI.use_instructions(CopyDstReg)) {
+      MachineInstr &CopyUseMI = *CopyUseMO.getParent();
       if (isRewriteCandidate(CopyUseMI)) {
-        const MachineOperand *Op =
-            CopyUseMI.findRegisterUseOperand(CopyDstReg, /*TRI=*/nullptr);
-        if (tryReassigningMFMAChain(CopyUseMI, Op->getOperandNo(),
-                                    VRM.getPhys(Op->getReg())))
+        if (tryReassigningMFMAChain(CopyUseMI, CopyUseMO.getOperandNo(),
+                                    VRM.getPhys(CopyUseMO.getReg())))
           MadeChange = true;
       }
     }
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir 
b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir
index 632401b6128c5..17a72110767bb 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir
@@ -187,8 +187,8 @@ body:             |
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
     ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = 
GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1)
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY 
[[GLOBAL_LOAD_DWORDX4_]]
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]].sub0_sub1:vreg_128_align2 = 
V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], 0, 0, 0, 0, implicit 
$mode, implicit $exec
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY 
[[GLOBAL_LOAD_DWORDX4_]]
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]].sub0_sub1:areg_128_align2 = 
V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], 0, 0, 0, 0, implicit $mode, 
implicit $exec
     ; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY3]], 0, 0, implicit 
$exec :: (store (s128), addrspace 1)
     ; CHECK-NEXT: SI_RETURN
     %0:vreg_64_align2 = COPY $vgpr4_vgpr5

``````````

</details>


https://github.com/llvm/llvm-project/pull/153023
_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to