Module: Mesa Branch: main Commit: 6518d09601f6ab002defbcb5908d62f226d8f3cd URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6518d09601f6ab002defbcb5908d62f226d8f3cd
Author: Rhys Perry <[email protected]> Date: Fri Sep 29 11:36:43 2023 +0100 aco: don't combine DPP into v_cmpx Signed-off-by: Rhys Perry <[email protected]> Reviewed-by: Georg Lehmann <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25471> --- src/amd/compiler/aco_insert_NOPs.cpp | 12 ++---------- src/amd/compiler/aco_ir.cpp | 4 ++++ src/amd/compiler/aco_ir.h | 9 +++++++++ src/amd/compiler/tests/test_optimizer_postRA.cpp | 22 ++++++++++++++++++++++ 4 files changed, 37 insertions(+), 10 deletions(-) diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index 049a6044f11..8cebae804f5 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -793,14 +793,6 @@ VALU_writes_sgpr(aco_ptr<Instruction>& instr) return false; } -bool -instr_writes_exec(const aco_ptr<Instruction>& instr) -{ - return std::any_of(instr->definitions.begin(), instr->definitions.end(), - [](const Definition& def) -> bool - { return def.physReg() == exec_lo || def.physReg() == exec_hi; }); -} - bool instr_writes_sgpr(const aco_ptr<Instruction>& instr) { @@ -915,7 +907,7 @@ handle_instruction_gfx10(State& state, NOP_ctx_gfx10& ctx, aco_ptr<Instruction>& if (!instr->isVALU() && instr->reads_exec()) { ctx.has_nonVALU_exec_read = true; } else if (instr->isVALU()) { - if (instr_writes_exec(instr)) { + if (instr->writes_exec()) { ctx.has_nonVALU_exec_read = false; /* Insert s_waitcnt_depctr instruction with magic imm to mitigate the problem */ @@ -1151,7 +1143,7 @@ handle_valu_partial_forwarding_hazard_instr(VALUPartialForwardingHazardGlobalSta aco_ptr<Instruction>& instr) { if (instr->isSALU() && !instr->definitions.empty()) { - if (block_state.state == written_after_exec_write && instr_writes_exec(instr)) + if (block_state.state == written_after_exec_write && instr->writes_exec()) block_state.state = exec_written; } else if (instr->isVALU()) { bool vgpr_write = false; diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 55342c08438..99822481634 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -399,6 +399,10 @@ can_use_DPP(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool dpp return false; } + /* According to LLVM, it's unsafe to combine DPP into v_cmpx. */ + if (instr->writes_exec()) + return false; + /* simpler than listing all VOP3P opcodes which do not support DPP */ if (instr->isVOP3P()) { return instr->opcode == aco_opcode::v_fma_mix_f32 || diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 6d9389eb0ee..808c6096c04 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1058,6 +1058,15 @@ struct Instruction { return false; } + constexpr bool writes_exec() const noexcept + { + for (const Definition& def : definitions) { + if (def.isFixed() && (def.physReg() == exec_lo || def.physReg() == exec_hi)) + return true; + } + return false; + } + Pseudo_instruction& pseudo() noexcept { assert(isPseudo()); diff --git a/src/amd/compiler/tests/test_optimizer_postRA.cpp b/src/amd/compiler/tests/test_optimizer_postRA.cpp index abc59165366..900993ad8c7 100644 --- a/src/amd/compiler/tests/test_optimizer_postRA.cpp +++ b/src/amd/compiler/tests/test_optimizer_postRA.cpp @@ -513,6 +513,28 @@ BEGIN_TEST(optimizer_postRA.dpp_across_exec) } END_TEST +BEGIN_TEST(optimizer_postRA.dpp_vcmpx) + //>> v1: %a:v[0], v1: %b:v[1] = p_startpgm + if (!setup_cs("v1 v1", GFX11)) + return; + + bld.instructions->at(0)->definitions[0].setFixed(PhysReg(256)); + bld.instructions->at(0)->definitions[1].setFixed(PhysReg(257)); + + PhysReg reg_v2(258); + Operand a(inputs[0], PhysReg(256)); + Operand b(inputs[1], PhysReg(257)); + + //! v1: %tmp0:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 + //! s2: %res0:exec = v_cmpx_lt_f32 %tmp0:v[2], %b:v[1] + //! p_unit_test 0, %res0:exec + Temp tmp0 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); + Temp res0 = bld.vopc(aco_opcode::v_cmpx_lt_f32, bld.def(bld.lm, exec), Operand(tmp0, reg_v2), b); + writeout(0, Operand(res0, exec)); + + finish_optimizer_postRA_test(); +END_TEST + BEGIN_TEST(optimizer_postRA.dpp_across_cf) //>> v1: %a:v[0], v1: %b:v[1], v1: %c:v[2], v1: %d:v[3], s2: %e:s[0-1] = p_startpgm if (!setup_cs("v1 v1 v1 v1 s2", GFX10_3))
