https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/153023
Handle a special case for copies from AGPR VGPR on the MFMA inputs. If the "input" is really a subregister def, we will not see the usual copy to VGPR for src2, only the read of the subregister def. Not sure if this pattern appears in practice. >From ed69c54e96c54e2d94016a6f843e27f6fa9061cd Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Mon, 11 Aug 2025 18:22:09 +0900 Subject: [PATCH] AMDGPU: Handle V->A MFMA copy from case with immediate src2 Handle a special case for copies from AGPR VGPR on the MFMA inputs. If the "input" is really a subregister def, we will not see the usual copy to VGPR for src2, only the read of the subregister def. Not sure if this pattern appears in practice. --- llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp | 11 ++++++----- .../AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp index b71c70db5e6b3..4e0d64a20690e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp @@ -375,13 +375,14 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesFromAGPR( Register CopyDstReg = UseMI.getOperand(0).getReg(); if (!CopyDstReg.isVirtual()) continue; + for (MachineOperand &CopyUseMO : MRI.reg_nodbg_operands(CopyDstReg)) { + if (!CopyUseMO.readsReg()) + continue; - for (MachineInstr &CopyUseMI : MRI.use_instructions(CopyDstReg)) { + MachineInstr &CopyUseMI = *CopyUseMO.getParent(); if (isRewriteCandidate(CopyUseMI)) { - const MachineOperand *Op = - CopyUseMI.findRegisterUseOperand(CopyDstReg, /*TRI=*/nullptr); - if (tryReassigningMFMAChain(CopyUseMI, Op->getOperandNo(), - VRM.getPhys(Op->getReg()))) + if (tryReassigningMFMAChain(CopyUseMI, CopyUseMO.getOperandNo(), + VRM.getPhys(CopyUseMO.getReg()))) MadeChange = true; } } diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir index 632401b6128c5..17a72110767bb 100644 --- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir +++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir @@ -187,8 +187,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3 ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]].sub0_sub1:vreg_128_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], 0, 0, 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]].sub0_sub1:areg_128_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], 0, 0, 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY3]], 0, 0, implicit $exec :: (store (s128), addrspace 1) ; CHECK-NEXT: SI_RETURN %0:vreg_64_align2 = COPY $vgpr4_vgpr5 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits