Module: Mesa
Branch: main
Commit: a73f76750bf0580a2bd4a42ffc427fba4e8a0ccb
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=a73f76750bf0580a2bd4a42ffc427fba4e8a0ccb

Author: Rhys Perry <[email protected]>
Date:   Fri Sep 22 20:01:55 2023 +0100

aco: fix LdsDirectVMEMHazard WaW with the wrong waitcnt

Seems we missed this case.

fossil-db (navi31):
Totals from 24 (0.03% of 79332) affected shaders:
Instrs: 3562 -> 3538 (-0.67%)
CodeSize: 18740 -> 18644 (-0.51%)

Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Daniel Schürmann <[email protected]>
Fixes: 2cdb3e4b6b6d ("aco: add VMEMtoScalarWriteHazard tests")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25374>

---

 src/amd/compiler/aco_insert_NOPs.cpp        |  2 +-
 src/amd/compiler/tests/test_insert_nops.cpp | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/amd/compiler/aco_insert_NOPs.cpp 
b/src/amd/compiler/aco_insert_NOPs.cpp
index 8cebae804f5..fd4f5acbc62 100644
--- a/src/amd/compiler/aco_insert_NOPs.cpp
+++ b/src/amd/compiler/aco_insert_NOPs.cpp
@@ -1396,7 +1396,7 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& 
ctx, aco_ptr<Instruction>&
     */
    if (instr->isVMEM() || instr->isFlatLike()) {
       for (Definition& def : instr->definitions)
-         fill_vgpr_bitset(ctx.vgpr_used_by_vmem_store, def.physReg(), 
def.bytes());
+         fill_vgpr_bitset(ctx.vgpr_used_by_vmem_load, def.physReg(), 
def.bytes());
       if (instr->definitions.empty()) {
          for (Operand& op : instr->operands)
             fill_vgpr_bitset(ctx.vgpr_used_by_vmem_store, op.physReg(), 
op.bytes());
diff --git a/src/amd/compiler/tests/test_insert_nops.cpp 
b/src/amd/compiler/tests/test_insert_nops.cpp
index 1658e8d653f..ffa0cf9daa0 100644
--- a/src/amd/compiler/tests/test_insert_nops.cpp
+++ b/src/amd/compiler/tests/test_insert_nops.cpp
@@ -577,6 +577,16 @@ BEGIN_TEST(insert_nops.lds_direct_vmem)
    bld.sopp(aco_opcode::s_waitcnt, -1, 0x3ff);
    bld.ldsdir(aco_opcode::lds_direct_load, Definition(PhysReg(256), v1), 
Operand(m0, s1));
 
+   //! p_unit_test 14
+   //! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[1], 0 offen
+   //! s1: %0:null = s_waitcnt_vscnt imm:0
+   //! s_waitcnt_depctr vm_vsrc(0)
+   //! v1: %0:v[0] = lds_direct_load %0:m0
+   bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14));
+   create_mubuf(0, PhysReg(256), PhysReg(257));
+   bld.sopk(aco_opcode::s_waitcnt_vscnt, Definition(sgpr_null, s1), 0);
+   bld.ldsdir(aco_opcode::lds_direct_load, Definition(PhysReg(256), v1), 
Operand(m0, s1));
+
    finish_insert_nops_test();
 END_TEST
 

Reply via email to