Module: Mesa Branch: main Commit: ccb856fbaa1ae9d77e034f6822c9d004837932fa URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ccb856fbaa1ae9d77e034f6822c9d004837932fa
Author: Marek Olšák <[email protected]> Date: Wed Jun 7 13:43:31 2023 -0400 radeonsi: determine si_pm4_state::reg_va_low_idx automatically The existing code doesn't work with the packed SET packets, so si_pm4_state needs to find reg_va_low_idx after the whole packet is built. Remove si_pm4_set_reg_va and do the same thing for SET_SH_REG. Reviewed-by: Pierre-Eric Pelloux-Prayer <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23517> --- src/gallium/drivers/radeonsi/si_pm4.c | 40 +++++++++++++++++++---- src/gallium/drivers/radeonsi/si_pm4.h | 1 - src/gallium/drivers/radeonsi/si_state_shaders.cpp | 26 +++++++-------- 3 files changed, 47 insertions(+), 20 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c index e297aade56d..effcf03ac5d 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.c +++ b/src/gallium/drivers/radeonsi/si_pm4.c @@ -8,6 +8,7 @@ #include "si_build_pm4.h" #include "sid.h" #include "util/u_memory.h" +#include "ac_debug.h" static void si_pm4_set_reg_custom(struct si_pm4_state *state, unsigned reg, uint32_t val, unsigned opcode, unsigned idx); @@ -123,6 +124,25 @@ void si_pm4_finalize(struct si_pm4_state *state) state->ndw = state->last_pm4 + 2 + reg_count; state->last_opcode = PKT3_SET_SH_REG; } else { + /* Set reg_va_low_idx to where the shader address is stored in the pm4 state. */ + if (state->screen->debug_flags & DBG(SQTT) && + (state->last_opcode == PKT3_SET_SH_REG_PAIRS_PACKED || + state->last_opcode == PKT3_SET_SH_REG_PAIRS_PACKED_N)) { + if (state->packed_is_padded) + reg_count++; /* Add this back because we only need to record the last write. */ + + for (int i = reg_count - 1; i >= 0; i--) { + unsigned reg_offset = SI_SH_REG_OFFSET + get_packed_reg_dw_offsetN(state, i) * 4; + + if (strstr(ac_get_register_name(state->screen->info.gfx_level, + state->screen->info.family, reg_offset), + "SPI_SHADER_PGM_LO_")) { + state->reg_va_low_idx = get_packed_reg_valueN_idx(state, i); + break; + } + } + } + /* All SET_*_PAIRS* packets on the gfx queue must set RESET_FILTER_CAM. */ if (!state->is_compute_queue) state->pm4[state->last_pm4] |= PKT3_RESET_FILTER_CAM_S(1); @@ -132,7 +152,21 @@ void si_pm4_finalize(struct si_pm4_state *state) state->pm4[state->last_pm4] &= PKT3_IT_OPCODE_C; state->pm4[state->last_pm4] |= PKT3_IT_OPCODE_S(PKT3_SET_SH_REG_PAIRS_PACKED_N); } + } + } + + if (state->screen->debug_flags & DBG(SQTT) && state->last_opcode == PKT3_SET_SH_REG) { + /* Set reg_va_low_idx to where the shader address is stored in the pm4 state. */ + unsigned reg_count = PKT_COUNT_G(state->pm4[state->last_pm4]); + unsigned reg_base_offset = SI_SH_REG_OFFSET + state->pm4[state->last_pm4 + 1] * 4; + for (unsigned i = 0; i < reg_count; i++) { + if (strstr(ac_get_register_name(state->screen->info.gfx_level, + state->screen->info.family, reg_base_offset + i * 4), + "SPI_SHADER_PGM_LO_")) { + state->reg_va_low_idx = state->last_pm4 + 2 + i; + break; + } } } } @@ -262,12 +296,6 @@ void si_pm4_set_reg_idx3(struct si_pm4_state *state, unsigned reg, uint32_t val) } } -void si_pm4_set_reg_va(struct si_pm4_state *state, unsigned reg, uint32_t val) -{ - si_pm4_set_reg(state, reg, val); - state->reg_va_low_idx = state->ndw - 1; -} - void si_pm4_clear_state(struct si_pm4_state *state, struct si_screen *sscreen, bool is_compute_queue) { diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h index 4f91d490687..482f5f2cbea 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.h +++ b/src/gallium/drivers/radeonsi/si_pm4.h @@ -52,7 +52,6 @@ struct si_pm4_state { void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw); void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val); -void si_pm4_set_reg_va(struct si_pm4_state *state, unsigned reg, uint32_t val); void si_pm4_set_reg_idx3(struct si_pm4_state *state, unsigned reg, uint32_t val); void si_pm4_finalize(struct si_pm4_state *state); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 3cb663e2c2c..a4793b6ea1d 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -685,7 +685,7 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader) return; va = shader->bo->gpu_address; - si_pm4_set_reg_va(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8); + si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8); shader->config.rsrc1 = S_00B528_VGPRS(si_shader_encode_vgprs(shader)) | S_00B528_SGPRS(si_shader_encode_sgprs(shader)) | @@ -714,13 +714,13 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader) S_00B404_CU_EN(0xffff), C_00B404_CU_EN, 16, &sscreen->info)); - si_pm4_set_reg_va(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8); + si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8); } else if (sscreen->info.gfx_level >= GFX10) { - si_pm4_set_reg_va(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8); + si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8); } else if (sscreen->info.gfx_level >= GFX9) { - si_pm4_set_reg_va(pm4, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8); + si_pm4_set_reg(pm4, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8); } else { - si_pm4_set_reg_va(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8); + si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8); si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, S_00B424_MEM_BASE(sscreen->info.address32_hi >> 8)); } @@ -797,7 +797,7 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader) oc_lds_en = shader->selector->stage == MESA_SHADER_TESS_EVAL ? 1 : 0; - si_pm4_set_reg_va(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8); + si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8); si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, S_00B324_MEM_BASE(sscreen->info.address32_hi >> 8)); si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES, @@ -1062,9 +1062,9 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) num_user_sgprs = GFX9_GS_NUM_USER_SGPR; if (sscreen->info.gfx_level >= GFX10) { - si_pm4_set_reg_va(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8); + si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8); } else { - si_pm4_set_reg_va(pm4, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8); + si_pm4_set_reg(pm4, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8); } uint32_t rsrc1 = S_00B228_VGPRS(si_shader_encode_vgprs(shader)) | @@ -1115,7 +1115,7 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) S_00B21C_WAVE_LIMIT(0x3F), C_00B21C_CU_EN, 0, &sscreen->info); - si_pm4_set_reg_va(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8); + si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8); si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, S_00B224_MEM_BASE(sscreen->info.address32_hi >> 8)); @@ -1357,7 +1357,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader else gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */ - si_pm4_set_reg_va(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8); + si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8); si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS, S_00B228_VGPRS(si_shader_encode_vgprs(shader)) | S_00B228_FLOAT_MODE(shader->config.float_mode) | @@ -1669,7 +1669,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(late_alloc_wave64)); } - si_pm4_set_reg_va(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8); + si_pm4_set_reg(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8); si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, S_00B124_MEM_BASE(sscreen->info.address32_hi >> 8)); @@ -1943,7 +1943,7 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader) } uint64_t va = shader->bo->gpu_address; - si_pm4_set_reg_va(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8); + si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8); si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, S_00B024_MEM_BASE(sscreen->info.address32_hi >> 8)); @@ -2002,7 +2002,7 @@ static void si_shader_init_pm4_state(struct si_screen *sscreen, struct si_shader assert(0); } - assert(shader->pm4.reg_va_low_idx != 0); + assert(!(sscreen->debug_flags & DBG(SQTT)) || shader->pm4.reg_va_low_idx != 0); } static void si_clear_vs_key_inputs(struct si_context *sctx, union si_shader_key *key,
