Module: Mesa
Branch: main
Commit: d04ee0771277d475c005847c8797107335fd4d22
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d04ee0771277d475c005847c8797107335fd4d22

Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl>
Date:   Mon Dec  4 01:39:35 2023 +0100

radeonsi: Add support to clear LDS at the end of a shader.

No hash updates as I didn't find a facility to do it in radeonsi
(even though there are flags like forcing fma32).

Note that we do this very late to avoid any optimizations that
might remove the dead stores. (Checked that LLVM doesn't remove
them, but it is admittedly potentially brittle)

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26679>

---

 src/gallium/drivers/radeonsi/si_debug_options.h   |  1 +
 src/gallium/drivers/radeonsi/si_shader.c          | 11 +++++++++++
 src/gallium/drivers/radeonsi/si_shader.h          |  2 ++
 src/gallium/drivers/radeonsi/si_shader_info.c     |  3 ++-
 src/gallium/drivers/radeonsi/si_state_shaders.cpp |  2 ++
 5 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_debug_options.h 
b/src/gallium/drivers/radeonsi/si_debug_options.h
index d69ca2c4ba9..270904361cf 100644
--- a/src/gallium/drivers/radeonsi/si_debug_options.h
+++ b/src/gallium/drivers/radeonsi/si_debug_options.h
@@ -20,6 +20,7 @@ OPT_INT(max_vram_map_size, 8196, "Maximum size of a buffer in 
VRAM to map direct
 OPT_BOOL(force_use_fma32, false, "Force use fma32 instruction for GPU family 
newer than gfx9")
 OPT_BOOL(dcc_msaa, false, "Enable DCC for MSAA")
 OPT_BOOL(zerovram, false, "Zero all VRAM allocations")
+OPT_BOOL(clear_lds, false, "Clear LDS at the end of shaders. Might decrease 
performance.")
 
 #undef OPT_BOOL
 #undef OPT_INT
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 1521b29d00a..b80f71601e7 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2255,6 +2255,11 @@ static void si_nir_emit_polygon_stipple(nir_shader *nir, 
struct si_shader_args *
    nir_discard_if(b, nir_inot(b, pass));
 }
 
+bool si_should_clear_lds(struct si_screen *sscreen, const struct nir_shader 
*shader)
+{
+   return shader->info.stage == MESA_SHADER_COMPUTE && 
shader->info.shared_size > 0 && sscreen->options.clear_lds;
+}
+
 struct nir_shader *si_get_nir_shader(struct si_shader *shader,
                                      struct si_shader_args *args,
                                      bool *free_nir,
@@ -2512,6 +2517,12 @@ struct nir_shader *si_get_nir_shader(struct si_shader 
*shader,
                .allow_fp16 = sel->screen->info.gfx_level >= GFX9,
             });
 
+   if (si_should_clear_lds(sel->screen, nir)) {
+      const unsigned chunk_size = 16; /* max single store size */
+      const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
+      NIR_PASS_V(nir, nir_clear_shared_memory, shared_size, chunk_size);
+   }
+
    NIR_PASS(progress, nir, ac_nir_lower_intrinsics_to_args, 
sel->screen->info.gfx_level,
             si_select_hw_stage(nir->info.stage, key, 
sel->screen->info.gfx_level),
             &args->ac);
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index f429ad20699..6345884d5b6 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -1046,6 +1046,8 @@ void gfx9_get_gs_info(struct si_shader_selector *es, 
struct si_shader_selector *
                       struct gfx9_gs_info *out);
 bool gfx10_is_ngg_passthrough(struct si_shader *shader);
 
+bool si_should_clear_lds(struct si_screen *sscreen, const struct nir_shader 
*shader);
+
 /* Inline helpers. */
 
 /* Return the pointer to the main shader part's pointer. */
diff --git a/src/gallium/drivers/radeonsi/si_shader_info.c 
b/src/gallium/drivers/radeonsi/si_shader_info.c
index 5f6007898db..23597e4e67a 100644
--- a/src/gallium/drivers/radeonsi/si_shader_info.c
+++ b/src/gallium/drivers/radeonsi/si_shader_info.c
@@ -652,7 +652,8 @@ void si_nir_scan_shader(struct si_screen *sscreen, const 
struct nir_shader *nir,
    info->uses_grid_size = BITSET_TEST(nir->info.system_values_read, 
SYSTEM_VALUE_NUM_WORKGROUPS);
    info->uses_tg_size = BITSET_TEST(nir->info.system_values_read, 
SYSTEM_VALUE_NUM_SUBGROUPS) ||
                         BITSET_TEST(nir->info.system_values_read, 
SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) ||
-                        BITSET_TEST(nir->info.system_values_read, 
SYSTEM_VALUE_SUBGROUP_ID);
+                        BITSET_TEST(nir->info.system_values_read, 
SYSTEM_VALUE_SUBGROUP_ID) ||
+                        si_should_clear_lds(sscreen, nir);
    info->uses_variable_block_size = BITSET_TEST(nir->info.system_values_read, 
SYSTEM_VALUE_WORKGROUP_SIZE);
    info->uses_drawid = BITSET_TEST(nir->info.system_values_read, 
SYSTEM_VALUE_DRAW_ID);
    info->uses_primid = BITSET_TEST(nir->info.system_values_read, 
SYSTEM_VALUE_PRIMITIVE_ID) ||
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp 
b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
index 6b726932236..ef0b4a501d4 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
@@ -167,6 +167,8 @@ void si_get_ir_cache_key(struct si_shader_selector *sel, 
bool ngg, bool es,
       shader_variant_flags |= 1 << 10;
    if (sel->screen->options.inline_uniforms)
       shader_variant_flags |= 1 << 11;
+   if (sel->screen->options.clear_lds)
+      shader_variant_flags |= 1 << 12;
 
    struct mesa_sha1 ctx;
    _mesa_sha1_init(&ctx);

Reply via email to