https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/140587
>From 82fd0c72c28b7bb3694d58fe4b9b885ec320d914 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Mon, 19 May 2025 20:02:54 +0200 Subject: [PATCH 1/2] AMDGPU: Remove redundant operand folding checks This was pre-filtering out a specific situation from being added to the fold candidate list. The operand legality will ultimately be checked with isOperandLegal before the fold is performed, so I don't see the plus in pre-filtering this one case. --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 18 ---- .../AMDGPU/fold-operands-frame-index.mir | 101 ++++++++++++++++++ 2 files changed, 101 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index b62230d4dc28c..9bbc8e75fd31b 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -778,24 +778,6 @@ bool SIFoldOperandsImpl::tryAddToFoldList( return true; } - // Check the case where we might introduce a second constant operand to a - // scalar instruction - if (TII->isSALU(MI->getOpcode())) { - const MCInstrDesc &InstDesc = MI->getDesc(); - const MCOperandInfo &OpInfo = InstDesc.operands()[OpNo]; - - // Fine if the operand can be encoded as an inline constant - if (!OpToFold->isReg() && !TII->isInlineConstant(*OpToFold, OpInfo)) { - // Otherwise check for another constant - for (unsigned i = 0, e = InstDesc.getNumOperands(); i != e; ++i) { - auto &Op = MI->getOperand(i); - if (OpNo != i && !Op.isReg() && - !TII->isInlineConstant(Op, InstDesc.operands()[i])) - return false; - } - } - } - appendFoldCandidate(FoldList, MI, OpNo, OpToFold); return true; } diff --git a/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir index 4417f205646ee..7fad2f466bc9f 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir @@ -573,3 +573,104 @@ body: | S_ENDPGM 0, implicit %2 ... + +--- +name: no_fold_multiple_fi_s_cselect_b32 +tracksRegLiveness: true +stack: + - { id: 0, size: 64, alignment: 4 } + - { id: 1, size: 32, alignment: 4 } +body: | + bb.0: + ; CHECK-LABEL: name: no_fold_multiple_fi_s_cselect_b32 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.1 + ; CHECK-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 killed [[S_MOV_B32_]], %stack.0, implicit undef $scc + ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] + %0:sreg_32 = S_MOV_B32 %stack.0 + %1:sreg_32 = S_MOV_B32 %stack.1 + %2:sreg_32 = S_CSELECT_B32 killed %1, killed %0, implicit undef $scc + S_ENDPGM 0, implicit %2 + +... + +--- +name: no_fold_multiple_fi_v_cndmask_b32_e64 +tracksRegLiveness: true +stack: + - { id: 0, size: 64, alignment: 4 } + - { id: 1, size: 32, alignment: 4 } +body: | + bb.0: + liveins: $sgpr8_sgpr9 + ; GFX9-LABEL: name: no_fold_multiple_fi_v_cndmask_b32_e64 + ; GFX9: liveins: $sgpr8_sgpr9 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr8_sgpr9 + ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec + ; GFX9-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[V_MOV_B32_e32_]], 0, killed [[V_MOV_B32_e32_1]], [[COPY]], implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] + ; + ; GFX10-LABEL: name: no_fold_multiple_fi_v_cndmask_b32_e64 + ; GFX10: liveins: $sgpr8_sgpr9 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr8_sgpr9 + ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec + ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, %stack.0, 0, killed [[V_MOV_B32_e32_]], [[COPY]], implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] + ; + ; GFX12-LABEL: name: no_fold_multiple_fi_v_cndmask_b32_e64 + ; GFX12: liveins: $sgpr8_sgpr9 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr8_sgpr9 + ; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec + ; GFX12-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, %stack.0, 0, killed [[V_MOV_B32_e32_]], [[COPY]], implicit $exec + ; GFX12-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] + %0:sreg_64_xexec = COPY $sgpr8_sgpr9 + %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %2:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec + %3:vgpr_32 = V_CNDMASK_B32_e64 0, killed %1, 0, killed %2, %0, implicit $exec + S_ENDPGM 0, implicit %3 + +... + +--- +name: no_fold_multiple_fi_v_cndmask_b32_e32 +tracksRegLiveness: true +stack: + - { id: 0, size: 64, alignment: 4 } + - { id: 1, size: 32, alignment: 4 } +body: | + bb.0: + liveins: $sgpr8_sgpr9 + ; GFX9-LABEL: name: no_fold_multiple_fi_v_cndmask_b32_e32 + ; GFX9: liveins: $sgpr8_sgpr9 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: $vcc = COPY $sgpr8_sgpr9 + ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec + ; GFX9-NEXT: [[V_CNDMASK_B32_e32_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 killed [[V_MOV_B32_e32_]], killed [[V_MOV_B32_e32_1]], implicit $vcc, implicit $exec + ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e32_]] + ; + ; GFX10-LABEL: name: no_fold_multiple_fi_v_cndmask_b32_e32 + ; GFX10: liveins: $sgpr8_sgpr9 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: $vcc = COPY $sgpr8_sgpr9 + ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec + ; GFX10-NEXT: [[V_CNDMASK_B32_e32_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 %stack.0, killed [[V_MOV_B32_e32_]], implicit $vcc, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e32_]] + ; + ; GFX12-LABEL: name: no_fold_multiple_fi_v_cndmask_b32_e32 + ; GFX12: liveins: $sgpr8_sgpr9 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: $vcc = COPY $sgpr8_sgpr9 + ; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec + ; GFX12-NEXT: [[V_CNDMASK_B32_e32_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 %stack.0, killed [[V_MOV_B32_e32_]], implicit $vcc, implicit $exec + ; GFX12-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e32_]] + $vcc = COPY $sgpr8_sgpr9 + %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %2:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec + %3:vgpr_32 = V_CNDMASK_B32_e32 killed %1, killed %2, implicit $vcc, implicit $exec + S_ENDPGM 0, implicit %3 + +... >From a372f44f105ffde627409890c1a6e8d6f3696ca6 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Tue, 20 May 2025 21:28:58 +0200 Subject: [PATCH 2/2] no multiple fi folding --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 85276bd24bcf4..6ce1a0ce34532 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6063,6 +6063,12 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, !isInlineConstant(Op, InstDesc.operands()[i]) && !Op.isIdenticalTo(*MO)) return false; + + // Do not fold a frame index into an instruction that already has a frame + // index. The frame index handling code doesn't handle fixing up operand + // constraints if there are multiple indexes. + if (Op.isFI() && MO->isFI()) + return false; } } else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() && isF16PseudoScalarTrans(MI.getOpcode())) { _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits