Module: Mesa
Branch: main
Commit: ab3184c0a2da96c87854cb1f6f80ffdf41c7cd98
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ab3184c0a2da96c87854cb1f6f80ffdf41c7cd98

Author: Rhys Perry <[email protected]>
Date:   Wed Feb  8 16:37:44 2023 +0000

aco: don't apply modifiers through DPP to unsupported instructions

Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Georg Lehmann <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21201>

---

 src/amd/compiler/aco_optimizer.cpp               | 69 ++++++++++++++----------
 src/amd/compiler/aco_optimizer_postRA.cpp        |  5 ++
 src/amd/compiler/tests/test_optimizer.cpp        | 16 ++++++
 src/amd/compiler/tests/test_optimizer_postRA.cpp | 16 ++++++
 4 files changed, 77 insertions(+), 29 deletions(-)

diff --git a/src/amd/compiler/aco_optimizer.cpp 
b/src/amd/compiler/aco_optimizer.cpp
index 4557fd0ff47..2043990fc79 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -4880,38 +4880,49 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& 
instr)
             continue;
          ssa_info info = ctx.info[instr->operands[i].tempId()];
 
+         if (!info.is_dpp() || info.instr->pass_flags != instr->pass_flags)
+            continue;
+
          aco_opcode swapped_op;
-         if (info.is_dpp() && info.instr->pass_flags == instr->pass_flags &&
-             (i == 0 || can_swap_operands(instr, &swapped_op)) &&
-             can_use_DPP(instr, true, info.is_dpp8()) && !instr->isDPP()) {
-            bool dpp8 = info.is_dpp8();
-            convert_to_DPP(instr, dpp8);
-            if (dpp8) {
-               DPP8_instruction* dpp = &instr->dpp8();
-               for (unsigned j = 0; j < 8; ++j)
-                  dpp->lane_sel[j] = info.instr->dpp8().lane_sel[j];
-               if (i) {
-                  instr->opcode = swapped_op;
-                  std::swap(instr->operands[0], instr->operands[1]);
-               }
-            } else {
-               DPP16_instruction* dpp = &instr->dpp16();
-               if (i) {
-                  instr->opcode = swapped_op;
-                  std::swap(instr->operands[0], instr->operands[1]);
-                  std::swap(dpp->neg[0], dpp->neg[1]);
-                  std::swap(dpp->abs[0], dpp->abs[1]);
-               }
-               dpp->dpp_ctrl = info.instr->dpp16().dpp_ctrl;
-               dpp->bound_ctrl = info.instr->dpp16().bound_ctrl;
-               dpp->neg[0] ^= info.instr->dpp16().neg[0] && !dpp->abs[0];
-               dpp->abs[0] |= info.instr->dpp16().abs[0];
+         if (i != 0 && !can_swap_operands(instr, &swapped_op))
+            continue;
+
+         if (instr->isDPP() || !can_use_DPP(instr, true, info.is_dpp8()))
+            continue;
+
+         bool dpp8 = info.is_dpp8();
+         bool input_mods = 
instr_info.can_use_input_modifiers[(int)instr->opcode] &&
+                           instr_info.operand_size[(int)instr->opcode] == 32;
+         if (!dpp8 && (info.instr->dpp16().neg[0] || 
info.instr->dpp16().abs[0]) && !input_mods)
+            continue;
+
+         convert_to_DPP(instr, dpp8);
+         if (dpp8) {
+            DPP8_instruction* dpp = &instr->dpp8();
+            for (unsigned j = 0; j < 8; ++j)
+               dpp->lane_sel[j] = info.instr->dpp8().lane_sel[j];
+            if (i) {
+               instr->opcode = swapped_op;
+               std::swap(instr->operands[0], instr->operands[1]);
             }
-            if (--ctx.uses[info.instr->definitions[0].tempId()])
-               ctx.uses[info.instr->operands[0].tempId()]++;
-            instr->operands[0].setTemp(info.instr->operands[0].getTemp());
-            break;
+         } else {
+            DPP16_instruction* dpp = &instr->dpp16();
+            if (i) {
+               instr->opcode = swapped_op;
+               std::swap(instr->operands[0], instr->operands[1]);
+               std::swap(dpp->neg[0], dpp->neg[1]);
+               std::swap(dpp->abs[0], dpp->abs[1]);
+            }
+            dpp->dpp_ctrl = info.instr->dpp16().dpp_ctrl;
+            dpp->bound_ctrl = info.instr->dpp16().bound_ctrl;
+            dpp->neg[0] ^= info.instr->dpp16().neg[0] && !dpp->abs[0];
+            dpp->abs[0] |= info.instr->dpp16().abs[0];
          }
+
+         if (--ctx.uses[info.instr->definitions[0].tempId()])
+            ctx.uses[info.instr->operands[0].tempId()]++;
+         instr->operands[0].setTemp(info.instr->operands[0].getTemp());
+         break;
       }
    }
 
diff --git a/src/amd/compiler/aco_optimizer_postRA.cpp 
b/src/amd/compiler/aco_optimizer_postRA.cpp
index 91fc663927c..510b9e196da 100644
--- a/src/amd/compiler/aco_optimizer_postRA.cpp
+++ b/src/amd/compiler/aco_optimizer_postRA.cpp
@@ -511,6 +511,11 @@ try_combine_dpp(pr_opt_ctx& ctx, aco_ptr<Instruction>& 
instr)
       if (i && !can_swap_operands(instr, &instr->opcode))
          continue;
 
+      bool input_mods = instr_info.can_use_input_modifiers[(int)instr->opcode] 
&&
+                        instr_info.operand_size[(int)instr->opcode] == 32;
+      if (!dpp8 && (mov->dpp16().neg[0] || mov->dpp16().abs[0]) && !input_mods)
+         continue;
+
       if (!dpp8) /* anything else doesn't make sense in SSA */
          assert(mov->dpp16().row_mask == 0xf && mov->dpp16().bank_mask == 0xf);
 
diff --git a/src/amd/compiler/tests/test_optimizer.cpp 
b/src/amd/compiler/tests/test_optimizer.cpp
index e501fd076bd..043602d8626 100644
--- a/src/amd/compiler/tests/test_optimizer.cpp
+++ b/src/amd/compiler/tests/test_optimizer.cpp
@@ -1064,6 +1064,22 @@ BEGIN_TEST(optimizer.dpp)
    res7->vop3().abs[0] = true;
    writeout(7, res7);
 
+   //! v1: %tmp11 = v_mov_b32 -%a row_mirror bound_ctrl:1
+   //! v1: %res11 = v_add_u32 %tmp11, %b
+   //! p_unit_test 11, %res11
+   auto tmp11 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, 
dpp_row_mirror);
+   tmp11->dpp16().neg[0] = true;
+   Temp res11 = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), tmp11, b);
+   writeout(11, res11);
+
+   //! v1: %tmp12 = v_mov_b32 -%a row_mirror bound_ctrl:1
+   //! v1: %res12 = v_add_f16 %tmp12, %b
+   //! p_unit_test 12, %res12
+   auto tmp12 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, 
dpp_row_mirror);
+   tmp12->dpp16().neg[0] = true;
+   Temp res12 = bld.vop2(aco_opcode::v_add_f16, bld.def(v1), tmp12, b);
+   writeout(12, res12);
+
    /* vcc */
    //! v1: %res8 = v_cndmask_b32 %a, %b, %c:vcc row_mirror bound_ctrl:1
    //! p_unit_test 8, %res8
diff --git a/src/amd/compiler/tests/test_optimizer_postRA.cpp 
b/src/amd/compiler/tests/test_optimizer_postRA.cpp
index c5f0a3bf701..066f74f7510 100644
--- a/src/amd/compiler/tests/test_optimizer_postRA.cpp
+++ b/src/amd/compiler/tests/test_optimizer_postRA.cpp
@@ -409,6 +409,22 @@ BEGIN_TEST(optimizer_postRA.dpp)
    res7->vop3().abs[0] = true;
    writeout(7, Operand(res7, reg_v2));
 
+   //! v1: %tmp12:v[2] = v_mov_b32 -%a:v[0] row_mirror bound_ctrl:1
+   //! v1: %res12:v[2] = v_add_u32 %tmp12:v[2], %b:v[1]
+   //! p_unit_test 12, %res12:v[2]
+   auto tmp12 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, 
dpp_row_mirror);
+   tmp12->dpp16().neg[0] = true;
+   Temp res12 = bld.vop2(aco_opcode::v_add_u32, bld.def(v1, reg_v2), 
Operand(tmp12, reg_v2), b);
+   writeout(12, Operand(res12, reg_v2));
+
+   //! v1: %tmp13:v[2] = v_mov_b32 -%a:v[0] row_mirror bound_ctrl:1
+   //! v1: %res13:v[2] = v_add_f16 %tmp13:v[2], %b:v[1]
+   //! p_unit_test 13, %res13:v[2]
+   auto tmp13 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, 
dpp_row_mirror);
+   tmp13->dpp16().neg[0] = true;
+   Temp res13 = bld.vop2(aco_opcode::v_add_f16, bld.def(v1, reg_v2), 
Operand(tmp13, reg_v2), b);
+   writeout(13, Operand(res13, reg_v2));
+
    /* vcc */
    //! v1: %res8:v[2] = v_cndmask_b32 %a:v[0], %b:v[1], %c:vcc row_mirror 
bound_ctrl:1
    //! p_unit_test 8, %res8:v[2]

Reply via email to