llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) <details> <summary>Changes</summary> It is not safe to use isOperandLegal on an instruction that does not have a complete set of operands. Unforunately the APIs are not set up in a convenient way to speculatively check if an instruction will be legal in a hypothetical instruction. Build all the operands and then verify they are legal after. This is clumsy, we should have a more direct check for will these operands give a legal instruction. This seems to fix a missed optimization in the gfx11 test. The fold was firing for gfx1150, but not gfx1100. Both should support vop3 literals so I'm not sure why it wasn't working before. --- Full diff: https://github.com/llvm/llvm-project/pull/155595.diff 2 Files Affected: - (modified) llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp (+19-16) - (modified) llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir (+7-11) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp index 184929a5a50f6..3831aacc2a639 100644 --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -250,7 +250,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, ++NumOperands; } if (auto *SDst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::sdst)) { - if (TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, SDst)) { + if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::sdst)) { DPPInst.add(*SDst); ++NumOperands; } @@ -296,11 +296,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0); assert(Src0); int Src0Idx = NumOperands; - if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) { - LLVM_DEBUG(dbgs() << " failed: src0 is illegal\n"); - Fail = true; - break; - } + DPPInst.add(*Src0); DPPInst->getOperand(NumOperands).setIsKill(false); ++NumOperands; @@ -319,7 +315,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, } auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1); if (Src1) { - int OpNum = NumOperands; + assert(AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src1)); // If subtarget does not support SGPRs for src1 operand then the // requirements are the same as for src0. We check src0 instead because // pseudos are shared between subtargets and allow SGPR for src1 on all. @@ -327,13 +323,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, assert(getOperandSize(*DPPInst, Src0Idx, *MRI) == getOperandSize(*DPPInst, NumOperands, *MRI) && "Src0 and Src1 operands should have the same size"); - OpNum = Src0Idx; - } - if (!TII->isOperandLegal(*DPPInst.getInstr(), OpNum, Src1)) { - LLVM_DEBUG(dbgs() << " failed: src1 is illegal\n"); - Fail = true; - break; } + DPPInst.add(*Src1); ++NumOperands; } @@ -349,9 +340,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, } auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2); if (Src2) { - if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) || - !TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) { - LLVM_DEBUG(dbgs() << " failed: src2 is illegal\n"); + if (!AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src2)) { + LLVM_DEBUG(dbgs() << " failed: dpp does not have src2\n"); Fail = true; break; } @@ -431,6 +421,19 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask)); DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask)); DPPInst.addImm(CombBCZ ? 1 : 0); + + for (AMDGPU::OpName Op : + {AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2}) { + int OpIdx = AMDGPU::getNamedOperandIdx(DPPOp, Op); + if (OpIdx == -1) + break; + + if (!TII->isOperandLegal(*DPPInst, OpIdx)) { + LLVM_DEBUG(dbgs() << " failed: src operand is illegal\n"); + Fail = true; + break; + } + } } while (false); if (Fail) { diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir index fb20e72a77103..3725384e885ee 100644 --- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir +++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir @@ -1,6 +1,6 @@ -# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1100 -# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-real-true16 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150 -# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN +# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-real-true16 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN --- @@ -8,8 +8,7 @@ # GCN: %6:vgpr_32, %7:sreg_32_xm0_xexec = V_SUBBREV_U32_e64_dpp %3, %0, %1, %5, 1, 1, 15, 15, 1, implicit $exec # GCN: %8:vgpr_32 = V_CVT_PK_U8_F32_e64_dpp %3, 4, %0, 2, %2, 2, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec # GCN: %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %0, 0, 12345678, 0, 0, implicit $mode, implicit $exec -# GFX1100: %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 2, 0, %7, 0, 0, implicit $mode, implicit $exec -# GFX1150: %12:vgpr_32 = V_MED3_F32_e64_dpp %3, 0, %1, 0, 2, 0, %7, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec +# GCN: %12:vgpr_32 = V_MED3_F32_e64_dpp %3, 0, %1, 0, 2, 0, %7, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec name: vop3 tracksRegLiveness: true body: | @@ -39,12 +38,9 @@ body: | # GCN-LABEL: name: vop3_sgpr_src1 # GCN: %6:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %1, 0, %2, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec -# GFX1100: %8:vgpr_32 = V_MED3_F32_e64 0, %7, 0, %2, 0, %1, 0, 0, implicit $mode, implicit $exec -# GFX1150: %8:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %1, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec -# GFX1100: %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %2, 0, %3, 0, 0, implicit $mode, implicit $exec -# GFX1150: %10:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %3, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec -# GFX1100: %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 42, 0, %2, 0, 0, implicit $mode, implicit $exec -# GFX1150: %12:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, 42, 0, %2, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec +# GCN: %8:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %1, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec +# GCN: %10:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %3, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec +# GCN: %12:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, 42, 0, %2, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec # GCN: %14:vgpr_32 = V_MED3_F32_e64 0, %13, 0, 4242, 0, %2, 0, 0, implicit $mode, implicit $exec name: vop3_sgpr_src1 tracksRegLiveness: true `````````` </details> https://github.com/llvm/llvm-project/pull/155595 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits