Module: Mesa Branch: main Commit: d04ee0771277d475c005847c8797107335fd4d22 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d04ee0771277d475c005847c8797107335fd4d22
Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Mon Dec 4 01:39:35 2023 +0100 radeonsi: Add support to clear LDS at the end of a shader. No hash updates as I didn't find a facility to do it in radeonsi (even though there are flags like forcing fma32). Note that we do this very late to avoid any optimizations that might remove the dead stores. (Checked that LLVM doesn't remove them, but it is admittedly potentially brittle) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26679> --- src/gallium/drivers/radeonsi/si_debug_options.h | 1 + src/gallium/drivers/radeonsi/si_shader.c | 11 +++++++++++ src/gallium/drivers/radeonsi/si_shader.h | 2 ++ src/gallium/drivers/radeonsi/si_shader_info.c | 3 ++- src/gallium/drivers/radeonsi/si_state_shaders.cpp | 2 ++ 5 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/si_debug_options.h b/src/gallium/drivers/radeonsi/si_debug_options.h index d69ca2c4ba9..270904361cf 100644 --- a/src/gallium/drivers/radeonsi/si_debug_options.h +++ b/src/gallium/drivers/radeonsi/si_debug_options.h @@ -20,6 +20,7 @@ OPT_INT(max_vram_map_size, 8196, "Maximum size of a buffer in VRAM to map direct OPT_BOOL(force_use_fma32, false, "Force use fma32 instruction for GPU family newer than gfx9") OPT_BOOL(dcc_msaa, false, "Enable DCC for MSAA") OPT_BOOL(zerovram, false, "Zero all VRAM allocations") +OPT_BOOL(clear_lds, false, "Clear LDS at the end of shaders. Might decrease performance.") #undef OPT_BOOL #undef OPT_INT diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 1521b29d00a..b80f71601e7 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2255,6 +2255,11 @@ static void si_nir_emit_polygon_stipple(nir_shader *nir, struct si_shader_args * nir_discard_if(b, nir_inot(b, pass)); } +bool si_should_clear_lds(struct si_screen *sscreen, const struct nir_shader *shader) +{ + return shader->info.stage == MESA_SHADER_COMPUTE && shader->info.shared_size > 0 && sscreen->options.clear_lds; +} + struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_shader_args *args, bool *free_nir, @@ -2512,6 +2517,12 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader, .allow_fp16 = sel->screen->info.gfx_level >= GFX9, }); + if (si_should_clear_lds(sel->screen, nir)) { + const unsigned chunk_size = 16; /* max single store size */ + const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size); + NIR_PASS_V(nir, nir_clear_shared_memory, shared_size, chunk_size); + } + NIR_PASS(progress, nir, ac_nir_lower_intrinsics_to_args, sel->screen->info.gfx_level, si_select_hw_stage(nir->info.stage, key, sel->screen->info.gfx_level), &args->ac); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index f429ad20699..6345884d5b6 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -1046,6 +1046,8 @@ void gfx9_get_gs_info(struct si_shader_selector *es, struct si_shader_selector * struct gfx9_gs_info *out); bool gfx10_is_ngg_passthrough(struct si_shader *shader); +bool si_should_clear_lds(struct si_screen *sscreen, const struct nir_shader *shader); + /* Inline helpers. */ /* Return the pointer to the main shader part's pointer. */ diff --git a/src/gallium/drivers/radeonsi/si_shader_info.c b/src/gallium/drivers/radeonsi/si_shader_info.c index 5f6007898db..23597e4e67a 100644 --- a/src/gallium/drivers/radeonsi/si_shader_info.c +++ b/src/gallium/drivers/radeonsi/si_shader_info.c @@ -652,7 +652,8 @@ void si_nir_scan_shader(struct si_screen *sscreen, const struct nir_shader *nir, info->uses_grid_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS); info->uses_tg_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS) || BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) || - BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID); + BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) || + si_should_clear_lds(sscreen, nir); info->uses_variable_block_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_SIZE); info->uses_drawid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID); info->uses_primid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) || diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 6b726932236..ef0b4a501d4 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -167,6 +167,8 @@ void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es, shader_variant_flags |= 1 << 10; if (sel->screen->options.inline_uniforms) shader_variant_flags |= 1 << 11; + if (sel->screen->options.clear_lds) + shader_variant_flags |= 1 << 12; struct mesa_sha1 ctx; _mesa_sha1_init(&ctx);