Module: Mesa
Branch: main
Commit: ccb856fbaa1ae9d77e034f6822c9d004837932fa
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ccb856fbaa1ae9d77e034f6822c9d004837932fa

Author: Marek Olšák <[email protected]>
Date:   Wed Jun  7 13:43:31 2023 -0400

radeonsi: determine si_pm4_state::reg_va_low_idx automatically

The existing code doesn't work with the packed SET packets, so si_pm4_state
needs to find reg_va_low_idx after the whole packet is built.

Remove si_pm4_set_reg_va and do the same thing for SET_SH_REG.

Reviewed-by: Pierre-Eric Pelloux-Prayer <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23517>

---

 src/gallium/drivers/radeonsi/si_pm4.c             | 40 +++++++++++++++++++----
 src/gallium/drivers/radeonsi/si_pm4.h             |  1 -
 src/gallium/drivers/radeonsi/si_state_shaders.cpp | 26 +++++++--------
 3 files changed, 47 insertions(+), 20 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pm4.c 
b/src/gallium/drivers/radeonsi/si_pm4.c
index e297aade56d..effcf03ac5d 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.c
+++ b/src/gallium/drivers/radeonsi/si_pm4.c
@@ -8,6 +8,7 @@
 #include "si_build_pm4.h"
 #include "sid.h"
 #include "util/u_memory.h"
+#include "ac_debug.h"
 
 static void si_pm4_set_reg_custom(struct si_pm4_state *state, unsigned reg, 
uint32_t val,
                                   unsigned opcode, unsigned idx);
@@ -123,6 +124,25 @@ void si_pm4_finalize(struct si_pm4_state *state)
          state->ndw = state->last_pm4 + 2 + reg_count;
          state->last_opcode = PKT3_SET_SH_REG;
       } else {
+         /* Set reg_va_low_idx to where the shader address is stored in the 
pm4 state. */
+         if (state->screen->debug_flags & DBG(SQTT) &&
+             (state->last_opcode == PKT3_SET_SH_REG_PAIRS_PACKED ||
+              state->last_opcode == PKT3_SET_SH_REG_PAIRS_PACKED_N)) {
+            if (state->packed_is_padded)
+               reg_count++; /* Add this back because we only need to record 
the last write. */
+
+            for (int i = reg_count - 1; i >= 0; i--) {
+               unsigned reg_offset = SI_SH_REG_OFFSET + 
get_packed_reg_dw_offsetN(state, i) * 4;
+
+               if (strstr(ac_get_register_name(state->screen->info.gfx_level,
+                                               state->screen->info.family, 
reg_offset),
+                          "SPI_SHADER_PGM_LO_")) {
+                  state->reg_va_low_idx = get_packed_reg_valueN_idx(state, i);
+                  break;
+               }
+            }
+         }
+
          /* All SET_*_PAIRS* packets on the gfx queue must set 
RESET_FILTER_CAM. */
          if (!state->is_compute_queue)
             state->pm4[state->last_pm4] |= PKT3_RESET_FILTER_CAM_S(1);
@@ -132,7 +152,21 @@ void si_pm4_finalize(struct si_pm4_state *state)
             state->pm4[state->last_pm4] &= PKT3_IT_OPCODE_C;
             state->pm4[state->last_pm4] |= 
PKT3_IT_OPCODE_S(PKT3_SET_SH_REG_PAIRS_PACKED_N);
          }
+      }
+   }
+
+   if (state->screen->debug_flags & DBG(SQTT) && state->last_opcode == 
PKT3_SET_SH_REG) {
+      /* Set reg_va_low_idx to where the shader address is stored in the pm4 
state. */
+      unsigned reg_count = PKT_COUNT_G(state->pm4[state->last_pm4]);
+      unsigned reg_base_offset = SI_SH_REG_OFFSET + state->pm4[state->last_pm4 
+ 1] * 4;
 
+      for (unsigned i = 0; i < reg_count; i++) {
+         if (strstr(ac_get_register_name(state->screen->info.gfx_level,
+                                         state->screen->info.family, 
reg_base_offset + i * 4),
+                    "SPI_SHADER_PGM_LO_")) {
+            state->reg_va_low_idx = state->last_pm4 + 2 + i;
+            break;
+         }
       }
    }
 }
@@ -262,12 +296,6 @@ void si_pm4_set_reg_idx3(struct si_pm4_state *state, 
unsigned reg, uint32_t val)
    }
 }
 
-void si_pm4_set_reg_va(struct si_pm4_state *state, unsigned reg, uint32_t val)
-{
-   si_pm4_set_reg(state, reg, val);
-   state->reg_va_low_idx = state->ndw - 1;
-}
-
 void si_pm4_clear_state(struct si_pm4_state *state, struct si_screen *sscreen,
                         bool is_compute_queue)
 {
diff --git a/src/gallium/drivers/radeonsi/si_pm4.h 
b/src/gallium/drivers/radeonsi/si_pm4.h
index 4f91d490687..482f5f2cbea 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_pm4.h
@@ -52,7 +52,6 @@ struct si_pm4_state {
 
 void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw);
 void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val);
-void si_pm4_set_reg_va(struct si_pm4_state *state, unsigned reg, uint32_t val);
 void si_pm4_set_reg_idx3(struct si_pm4_state *state, unsigned reg, uint32_t 
val);
 void si_pm4_finalize(struct si_pm4_state *state);
 
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp 
b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
index 3cb663e2c2c..a4793b6ea1d 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
@@ -685,7 +685,7 @@ static void si_shader_ls(struct si_screen *sscreen, struct 
si_shader *shader)
       return;
 
    va = shader->bo->gpu_address;
-   si_pm4_set_reg_va(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
+   si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
 
    shader->config.rsrc1 = S_00B528_VGPRS(si_shader_encode_vgprs(shader)) |
                           S_00B528_SGPRS(si_shader_encode_sgprs(shader)) |
@@ -714,13 +714,13 @@ static void si_shader_hs(struct si_screen *sscreen, 
struct si_shader *shader)
                                          S_00B404_CU_EN(0xffff),
                                          C_00B404_CU_EN, 16, &sscreen->info));
 
-      si_pm4_set_reg_va(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
+      si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
    } else if (sscreen->info.gfx_level >= GFX10) {
-      si_pm4_set_reg_va(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
+      si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
    } else if (sscreen->info.gfx_level >= GFX9) {
-      si_pm4_set_reg_va(pm4, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
+      si_pm4_set_reg(pm4, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
    } else {
-      si_pm4_set_reg_va(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
+      si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
       si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS,
                      S_00B424_MEM_BASE(sscreen->info.address32_hi >> 8));
    }
@@ -797,7 +797,7 @@ static void si_shader_es(struct si_screen *sscreen, struct 
si_shader *shader)
 
    oc_lds_en = shader->selector->stage == MESA_SHADER_TESS_EVAL ? 1 : 0;
 
-   si_pm4_set_reg_va(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
+   si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
    si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES,
                   S_00B324_MEM_BASE(sscreen->info.address32_hi >> 8));
    si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES,
@@ -1062,9 +1062,9 @@ static void si_shader_gs(struct si_screen *sscreen, 
struct si_shader *shader)
          num_user_sgprs = GFX9_GS_NUM_USER_SGPR;
 
       if (sscreen->info.gfx_level >= GFX10) {
-         si_pm4_set_reg_va(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
+         si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
       } else {
-         si_pm4_set_reg_va(pm4, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8);
+         si_pm4_set_reg(pm4, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8);
       }
 
       uint32_t rsrc1 = S_00B228_VGPRS(si_shader_encode_vgprs(shader)) |
@@ -1115,7 +1115,7 @@ static void si_shader_gs(struct si_screen *sscreen, 
struct si_shader *shader)
                         S_00B21C_WAVE_LIMIT(0x3F),
                         C_00B21C_CU_EN, 0, &sscreen->info);
 
-      si_pm4_set_reg_va(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
+      si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
       si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS,
                      S_00B224_MEM_BASE(sscreen->info.address32_hi >> 8));
 
@@ -1357,7 +1357,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, 
struct si_shader *shader
    else
       gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */
 
-   si_pm4_set_reg_va(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
+   si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
    si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
                   S_00B228_VGPRS(si_shader_encode_vgprs(shader)) |
                   S_00B228_FLOAT_MODE(shader->config.float_mode) |
@@ -1669,7 +1669,7 @@ static void si_shader_vs(struct si_screen *sscreen, 
struct si_shader *shader,
       si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, 
S_00B11C_LIMIT(late_alloc_wave64));
    }
 
-   si_pm4_set_reg_va(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8);
+   si_pm4_set_reg(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8);
    si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS,
                   S_00B124_MEM_BASE(sscreen->info.address32_hi >> 8));
 
@@ -1943,7 +1943,7 @@ static void si_shader_ps(struct si_screen *sscreen, 
struct si_shader *shader)
    }
 
    uint64_t va = shader->bo->gpu_address;
-   si_pm4_set_reg_va(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
+   si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
    si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS,
                   S_00B024_MEM_BASE(sscreen->info.address32_hi >> 8));
 
@@ -2002,7 +2002,7 @@ static void si_shader_init_pm4_state(struct si_screen 
*sscreen, struct si_shader
       assert(0);
    }
 
-   assert(shader->pm4.reg_va_low_idx != 0);
+   assert(!(sscreen->debug_flags & DBG(SQTT)) || shader->pm4.reg_va_low_idx != 
0);
 }
 
 static void si_clear_vs_key_inputs(struct si_context *sctx, union 
si_shader_key *key,

Reply via email to