Module: Mesa Branch: main Commit: eaf61adea56a9242a160afa4f68827e6568b4e80 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=eaf61adea56a9242a160afa4f68827e6568b4e80
Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Wed Nov 1 15:34:13 2023 +0100 radv: Add option to clear LDS at the end of a shader. Only shaders which explicitly allow shared memory are included for now. The pass is very late to avoid optimizations removing the stores and to ensure the clear gets added after MS outputs get loaded from LDS. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26679> --- src/amd/vulkan/radv_instance.c | 3 +++ src/amd/vulkan/radv_pipeline.c | 17 +++++++++++++++++ src/amd/vulkan/radv_private.h | 1 + src/amd/vulkan/radv_shader.h | 2 ++ src/amd/vulkan/radv_shader_info.c | 3 ++- src/util/driconf.h | 3 +++ 6 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/amd/vulkan/radv_instance.c b/src/amd/vulkan/radv_instance.c index cf80d2ab9c6..762b4b3383d 100644 --- a/src/amd/vulkan/radv_instance.c +++ b/src/amd/vulkan/radv_instance.c @@ -129,6 +129,7 @@ static const driOptionDescription radv_dri_options[] = { DRI_CONF_RADV_DISABLE_SHRINK_IMAGE_STORE(false) DRI_CONF_RADV_NO_DYNAMIC_BOUNDS(false) DRI_CONF_RADV_OVERRIDE_UNIFORM_OFFSET_ALIGNMENT(0) + DRI_CONF_RADV_CLEAR_LDS(false) DRI_CONF_SECTION_END DRI_CONF_SECTION_DEBUG @@ -193,6 +194,8 @@ radv_init_dri_options(struct radv_instance *instance) if (driQueryOptionb(&instance->dri_options, "radv_disable_dcc")) instance->debug_flags |= RADV_DEBUG_NO_DCC; + instance->clear_lds = driQueryOptionb(&instance->dri_options, "radv_clear_lds"); + instance->zero_vram = driQueryOptionb(&instance->dri_options, "radv_zero_vram"); instance->disable_aniso_single_level = driQueryOptionb(&instance->dri_options, "radv_disable_aniso_single_level"); diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 38048d9cfc8..a807cbdbab6 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -229,6 +229,7 @@ radv_generate_pipeline_key(const struct radv_device *device, const VkPipelineSha #define RADV_HASH_SHADER_PS_WAVE32 (1 << 2) #define RADV_HASH_SHADER_GE_WAVE32 (1 << 3) #define RADV_HASH_SHADER_LLVM (1 << 4) +#define RADV_HASH_SHADER_CLEAR_LDS (1 << 5) #define RADV_HASH_SHADER_KEEP_STATISTICS (1 << 8) #define RADV_HASH_SHADER_USE_NGG_CULLING (1 << 13) #define RADV_HASH_SHADER_EMULATE_RT (1 << 16) @@ -267,6 +268,8 @@ radv_get_hash_flags(const struct radv_device *device, bool stats) hash_flags |= RADV_HASH_SHADER_NO_RT; if (device->instance->dual_color_blend_by_location) hash_flags |= RADV_HASH_SHADER_DUAL_BLEND_MRT1; + if (device->instance->clear_lds) + hash_flags |= RADV_HASH_SHADER_CLEAR_LDS; return hash_flags; } @@ -701,6 +704,12 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_key NIR_PASS_V(stage->nir, ac_nir_lower_ps, &options); } + if (radv_shader_should_clear_lds(device, stage->nir)) { + const unsigned chunk_size = 16; /* max single store size */ + const unsigned shared_size = ALIGN(stage->nir->info.shared_size, chunk_size); + NIR_PASS(_, stage->nir, nir_clear_shared_memory, shared_size, chunk_size); + } + NIR_PASS(_, stage->nir, nir_lower_int64); NIR_PASS(_, stage->nir, nir_opt_idiv_const, 8); @@ -782,6 +791,14 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_key } } +bool +radv_shader_should_clear_lds(const struct radv_device *device, const nir_shader *shader) +{ + return (shader->info.stage == MESA_SHADER_COMPUTE || shader->info.stage == MESA_SHADER_MESH || + shader->info.stage == MESA_SHADER_TASK) && + shader->info.shared_size > 0 && device->instance->clear_lds; +} + static uint32_t radv_get_executable_count(struct radv_pipeline *pipeline) { diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 09e56f05725..3f0da53d273 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -382,6 +382,7 @@ struct radv_instance { bool force_rt_wave64; bool dual_color_blend_by_location; bool legacy_sparse_binding; + bool clear_lds; char *app_layer; uint8_t override_graphics_shader_version; uint8_t override_compute_shader_version; diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 1e57200955b..bab97c1e7a4 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -723,6 +723,8 @@ void radv_optimize_nir_algebraic(nir_shader *shader, bool opt_offsets); void radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_key *pipeline_key, struct radv_shader_stage *stage); +bool radv_shader_should_clear_lds(const struct radv_device *device, const nir_shader *shader); + nir_shader *radv_parse_rt_stage(struct radv_device *device, const VkPipelineShaderStageCreateInfo *sinfo, const struct radv_pipeline_key *key, const struct radv_pipeline_layout *pipeline_layout); diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index 42ea2f2d6ad..9c74669ecc0 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -1179,7 +1179,8 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n (nir->info.stage == MESA_SHADER_MESH && device->physical_device->rad_info.gfx_level < GFX11); info->cs.uses_local_invocation_idx = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) | BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) | - BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS); + BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS) | + radv_shader_should_clear_lds(device, nir); if (nir->info.stage == MESA_SHADER_COMPUTE || nir->info.stage == MESA_SHADER_TASK || nir->info.stage == MESA_SHADER_MESH) { diff --git a/src/util/driconf.h b/src/util/driconf.h index 4c06f9cc5e2..e91bf81a381 100644 --- a/src/util/driconf.h +++ b/src/util/driconf.h @@ -713,6 +713,9 @@ #define DRI_CONF_RADV_APP_LAYER() DRI_CONF_OPT_S_NODEF(radv_app_layer, "Select an application layer.") +#define DRI_CONF_RADV_CLEAR_LDS(def) \ + DRI_CONF_OPT_B(radv_clear_lds, def, "Clear LDS at the end of shaders. Might decrease performance.") + /** * \brief ANV specific configuration options */