Module: Mesa
Branch: main
Commit: 576afa85405a9796c2eb9c88cdfa28b441033009
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=576afa85405a9796c2eb9c88cdfa28b441033009

Author: Georg Lehmann <dadschoo...@gmail.com>
Date:   Fri Nov 24 15:45:44 2023 +0100

aco: don't optimize DPP across more than one block

Register write tracking doesn't work for inactive lanes, so this was unsafe.

Foz-DB Navi31:
Totals from 8 (0.01% of 78196) affected shaders:
Instrs: 11513 -> 11515 (+0.02%)
CodeSize: 61056 -> 61064 (+0.01%)

Cc: mesa-stable
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10197
Reviewed-by: Daniel Schürmann <dan...@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26373>

---

 src/amd/compiler/aco_optimizer_postRA.cpp        |  7 +++++++
 src/amd/compiler/tests/test_optimizer_postRA.cpp | 12 ++++++------
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/amd/compiler/aco_optimizer_postRA.cpp 
b/src/amd/compiler/aco_optimizer_postRA.cpp
index 48ada196926..5978e7c4b46 100644
--- a/src/amd/compiler/aco_optimizer_postRA.cpp
+++ b/src/amd/compiler/aco_optimizer_postRA.cpp
@@ -492,6 +492,13 @@ try_combine_dpp(pr_opt_ctx& ctx, aco_ptr<Instruction>& 
instr)
       if (!op_instr_idx.found())
          continue;
 
+      /* is_overwritten_since only considers active lanes when the register 
could possibly
+       * have been overwritten from inactive lanes. Restrict this optimization 
to at most
+       * one block so that there is no possibility for clobbered inactive 
lanes.
+       */
+      if (ctx.current_block->index - op_instr_idx.block > 1)
+         continue;
+
       const Instruction* mov = ctx.get(op_instr_idx);
       if (mov->opcode != aco_opcode::v_mov_b32 || !mov->isDPP())
          continue;
diff --git a/src/amd/compiler/tests/test_optimizer_postRA.cpp 
b/src/amd/compiler/tests/test_optimizer_postRA.cpp
index 811e762399b..c0cb4fc8a2f 100644
--- a/src/amd/compiler/tests/test_optimizer_postRA.cpp
+++ b/src/amd/compiler/tests/test_optimizer_postRA.cpp
@@ -571,6 +571,12 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf)
          //! buffer_store_dword %c:v[2], 0, %d:v[3], 0 offen
          bld.mubuf(aco_opcode::buffer_store_dword, c, Operand::zero(), d, 
Operand::zero(), 0, true);
 
+         //! v1: %res10:v[12] = v_add_f32 %a:v[0], %b:v[1] row_mirror 
bound_ctrl:1 fi
+         //! p_unit_test 10, %res10:v[12]
+         Temp result =
+            bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), 
Operand(dpp_tmp, reg_v12), b);
+         writeout(10, Operand(result, reg_v12));
+
          //! p_logical_end
          //! s2: %0:vcc = p_branch BB3
 
@@ -605,12 +611,6 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf)
    //! /* logical preds: BB1, BB4, / linear preds: BB4, BB5, / kind: uniform, 
top-level, merge, */
    //! s2: %0:exec = p_parallelcopy %saved_exec:s[84-85]
 
-   //! v1: %res10:v[12] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 fi
-   //! p_unit_test 10, %res10:v[12]
-   Temp result =
-      bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, 
reg_v12), b);
-   writeout(10, Operand(result, reg_v12));
-
    finish_optimizer_postRA_test();
 END_TEST
 

Reply via email to