Module: Mesa Branch: main Commit: b55a2065e03e0f033217b1b58a0c18e3a5e86136 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b55a2065e03e0f033217b1b58a0c18e3a5e86136
Author: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-pra...@amd.com> Date: Tue Nov 7 16:05:52 2023 +0100 radeonsi/sqtt: rework pm4.reg_va_low_idx The initial logic was to remember the place were SPI_SHADER_PGM_LO_* are written, then assume that we can get the register offset because the sequence would always be: PKT3_SET_SH_REG SPI_SHADER_PGM_LO_* register offset VA low 32 bits value <- reg_va_low_idx The problem is that this sequence isn't guaranteed, for instance we can get this instead: 0 c0067600 | 1 00000046 | 2 003ffffd | SPI_SHADER_PGM_RSRC3_VS 3 00000020 | SPI_SHADER_LATE_ALLOC_VS 4 * 00002080 | SPI_SHADER_PGM_LO_VS 5 00000080 | SPI_SHADER_PGM_HI_VS So the assert in si_state_draw.cpp would fail as well as the VA update logic. So instead remember which the SPI_SHADER_PGM_LO_* offset, and the low 32 bits of the VA in si_update_shaders. Fixes: 8034a71430b ("radeonsi/sqtt: re-export shaders in a single bo") Reviewed-by: Marek Olšák <marek.ol...@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26774> --- src/gallium/drivers/radeonsi/si_pm4.c | 5 +++-- src/gallium/drivers/radeonsi/si_pm4.h | 2 +- src/gallium/drivers/radeonsi/si_state_draw.cpp | 5 ++--- src/gallium/drivers/radeonsi/si_state_shaders.cpp | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c index a53904e4920..488ff62bc50 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.c +++ b/src/gallium/drivers/radeonsi/si_pm4.c @@ -135,7 +135,7 @@ void si_pm4_finalize(struct si_pm4_state *state) if (strstr(ac_get_register_name(state->screen->info.gfx_level, state->screen->info.family, reg_offset), "SPI_SHADER_PGM_LO_")) { - state->reg_va_low_idx = get_packed_reg_valueN_idx(state, i); + state->spi_shader_pgm_lo_reg = reg_offset; break; } } @@ -158,7 +158,8 @@ void si_pm4_finalize(struct si_pm4_state *state) if (strstr(ac_get_register_name(state->screen->info.gfx_level, state->screen->info.family, reg_base_offset + i * 4), "SPI_SHADER_PGM_LO_")) { - state->reg_va_low_idx = state->last_pm4 + 2 + i; + state->spi_shader_pgm_lo_reg = reg_base_offset + i * 4; + break; } } diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h index 39f72ed4167..56a6b654d3a 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.h +++ b/src/gallium/drivers/radeonsi/si_pm4.h @@ -45,7 +45,7 @@ struct si_pm4_state { uint16_t max_dw; /* Used by SQTT to override the shader address */ - uint16_t reg_va_low_idx; + uint32_t spi_shader_pgm_lo_reg; /* This must be the last field because the array can continue after the structure. */ uint32_t pm4[64]; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index bba9aae96ce..756ddf9199a 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -370,9 +370,8 @@ static bool si_update_shaders(struct si_context *sctx) struct si_pm4_state *pm4 = &shader->pm4; - uint32_t va_low = (pipeline->bo->gpu_address + pipeline->offset[i]) >> 8; - assert(PKT3_IT_OPCODE_G(pm4->pm4[pm4->reg_va_low_idx - 2]) == PKT3_SET_SH_REG); - uint32_t reg = (pm4->pm4[pm4->reg_va_low_idx - 1] << 2) + SI_SH_REG_OFFSET; + uint64_t va_low = (pipeline->bo->gpu_address + pipeline->offset[i]) >> 8; + uint32_t reg = pm4->spi_shader_pgm_lo_reg; si_pm4_set_reg(&pipeline->pm4, reg, va_low); } } diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index ef0b4a501d4..a10a4373e67 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -2096,7 +2096,7 @@ static void si_shader_init_pm4_state(struct si_screen *sscreen, struct si_shader assert(0); } - assert(!(sscreen->debug_flags & DBG(SQTT)) || shader->pm4.reg_va_low_idx != 0); + assert(!(sscreen->debug_flags & DBG(SQTT)) || shader->pm4.spi_shader_pgm_lo_reg != 0); } static void si_clear_vs_key_inputs(struct si_context *sctx, union si_shader_key *key,