Module: Mesa
Branch: main
Commit: 6518d09601f6ab002defbcb5908d62f226d8f3cd
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6518d09601f6ab002defbcb5908d62f226d8f3cd

Author: Rhys Perry <[email protected]>
Date:   Fri Sep 29 11:36:43 2023 +0100

aco: don't combine DPP into v_cmpx

Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Georg Lehmann <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25471>

---

 src/amd/compiler/aco_insert_NOPs.cpp             | 12 ++----------
 src/amd/compiler/aco_ir.cpp                      |  4 ++++
 src/amd/compiler/aco_ir.h                        |  9 +++++++++
 src/amd/compiler/tests/test_optimizer_postRA.cpp | 22 ++++++++++++++++++++++
 4 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/src/amd/compiler/aco_insert_NOPs.cpp 
b/src/amd/compiler/aco_insert_NOPs.cpp
index 049a6044f11..8cebae804f5 100644
--- a/src/amd/compiler/aco_insert_NOPs.cpp
+++ b/src/amd/compiler/aco_insert_NOPs.cpp
@@ -793,14 +793,6 @@ VALU_writes_sgpr(aco_ptr<Instruction>& instr)
    return false;
 }
 
-bool
-instr_writes_exec(const aco_ptr<Instruction>& instr)
-{
-   return std::any_of(instr->definitions.begin(), instr->definitions.end(),
-                      [](const Definition& def) -> bool
-                      { return def.physReg() == exec_lo || def.physReg() == 
exec_hi; });
-}
-
 bool
 instr_writes_sgpr(const aco_ptr<Instruction>& instr)
 {
@@ -915,7 +907,7 @@ handle_instruction_gfx10(State& state, NOP_ctx_gfx10& ctx, 
aco_ptr<Instruction>&
    if (!instr->isVALU() && instr->reads_exec()) {
       ctx.has_nonVALU_exec_read = true;
    } else if (instr->isVALU()) {
-      if (instr_writes_exec(instr)) {
+      if (instr->writes_exec()) {
          ctx.has_nonVALU_exec_read = false;
 
          /* Insert s_waitcnt_depctr instruction with magic imm to mitigate the 
problem */
@@ -1151,7 +1143,7 @@ 
handle_valu_partial_forwarding_hazard_instr(VALUPartialForwardingHazardGlobalSta
                                             aco_ptr<Instruction>& instr)
 {
    if (instr->isSALU() && !instr->definitions.empty()) {
-      if (block_state.state == written_after_exec_write && 
instr_writes_exec(instr))
+      if (block_state.state == written_after_exec_write && 
instr->writes_exec())
          block_state.state = exec_written;
    } else if (instr->isVALU()) {
       bool vgpr_write = false;
diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp
index 55342c08438..99822481634 100644
--- a/src/amd/compiler/aco_ir.cpp
+++ b/src/amd/compiler/aco_ir.cpp
@@ -399,6 +399,10 @@ can_use_DPP(amd_gfx_level gfx_level, const 
aco_ptr<Instruction>& instr, bool dpp
          return false;
    }
 
+   /* According to LLVM, it's unsafe to combine DPP into v_cmpx. */
+   if (instr->writes_exec())
+      return false;
+
    /* simpler than listing all VOP3P opcodes which do not support DPP */
    if (instr->isVOP3P()) {
       return instr->opcode == aco_opcode::v_fma_mix_f32 ||
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 6d9389eb0ee..808c6096c04 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -1058,6 +1058,15 @@ struct Instruction {
       return false;
    }
 
+   constexpr bool writes_exec() const noexcept
+   {
+      for (const Definition& def : definitions) {
+         if (def.isFixed() && (def.physReg() == exec_lo || def.physReg() == 
exec_hi))
+            return true;
+      }
+      return false;
+   }
+
    Pseudo_instruction& pseudo() noexcept
    {
       assert(isPseudo());
diff --git a/src/amd/compiler/tests/test_optimizer_postRA.cpp 
b/src/amd/compiler/tests/test_optimizer_postRA.cpp
index abc59165366..900993ad8c7 100644
--- a/src/amd/compiler/tests/test_optimizer_postRA.cpp
+++ b/src/amd/compiler/tests/test_optimizer_postRA.cpp
@@ -513,6 +513,28 @@ BEGIN_TEST(optimizer_postRA.dpp_across_exec)
    }
 END_TEST
 
+BEGIN_TEST(optimizer_postRA.dpp_vcmpx)
+   //>> v1: %a:v[0], v1: %b:v[1] = p_startpgm
+   if (!setup_cs("v1 v1", GFX11))
+      return;
+
+   bld.instructions->at(0)->definitions[0].setFixed(PhysReg(256));
+   bld.instructions->at(0)->definitions[1].setFixed(PhysReg(257));
+
+   PhysReg reg_v2(258);
+   Operand a(inputs[0], PhysReg(256));
+   Operand b(inputs[1], PhysReg(257));
+
+   //! v1: %tmp0:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
+   //! s2: %res0:exec = v_cmpx_lt_f32 %tmp0:v[2], %b:v[1]
+   //! p_unit_test 0, %res0:exec
+   Temp tmp0 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, 
dpp_row_mirror);
+   Temp res0 = bld.vopc(aco_opcode::v_cmpx_lt_f32, bld.def(bld.lm, exec), 
Operand(tmp0, reg_v2), b);
+   writeout(0, Operand(res0, exec));
+
+   finish_optimizer_postRA_test();
+END_TEST
+
 BEGIN_TEST(optimizer_postRA.dpp_across_cf)
    //>> v1: %a:v[0], v1: %b:v[1], v1: %c:v[2], v1: %d:v[3], s2: %e:s[0-1] = 
p_startpgm
    if (!setup_cs("v1 v1 v1 v1 s2", GFX10_3))

Reply via email to