Module: Mesa Branch: main Commit: e3e47aa96e5ab938be2c89ce0b5481928cc68f2e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e3e47aa96e5ab938be2c89ce0b5481928cc68f2e
Author: Ganesh Belgur Ramachandra <[email protected]> Date: Thu Sep 14 04:55:37 2023 -0500 radeonsi: "clear_12bytes_buffer" shader in nir Reviewed-by: Marek Olšák <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25221> --- src/gallium/drivers/radeonsi/si_compute_blit.c | 4 +-- src/gallium/drivers/radeonsi/si_pipe.h | 2 +- src/gallium/drivers/radeonsi/si_shaderlib_nir.c | 21 ++++++++++++++ src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c | 35 ------------------------ 4 files changed, 23 insertions(+), 39 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index a25c0a1d19c..06e2fc16c46 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -317,8 +317,6 @@ static void si_compute_clear_12bytes_buffer(struct si_context *sctx, struct pipe const uint32_t *clear_value, unsigned flags, enum si_coherency coher) { - struct pipe_context *ctx = &sctx->b; - assert(dst_offset % 4 == 0); assert(size % 4 == 0); unsigned size_12 = DIV_ROUND_UP(size, 12); @@ -333,7 +331,7 @@ static void si_compute_clear_12bytes_buffer(struct si_context *sctx, struct pipe struct pipe_grid_info info = {0}; if (!sctx->cs_clear_12bytes_buffer) - sctx->cs_clear_12bytes_buffer = si_clear_12bytes_buffer_shader(ctx); + sctx->cs_clear_12bytes_buffer = si_clear_12bytes_buffer_shader(sctx); info.block[0] = 64; info.last_block[0] = size_12 % 64; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 4c94aa67982..af042af269a 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1642,7 +1642,7 @@ void *si_create_dma_compute_shader(struct pipe_context *ctx, unsigned num_dwords bool dst_stream_cache_policy, bool is_copy); void *si_create_clear_buffer_rmw_cs(struct si_context *sctx); void *si_clear_render_target_shader(struct si_context *sctx, enum pipe_texture_target type); -void *si_clear_12bytes_buffer_shader(struct pipe_context *ctx); +void *si_clear_12bytes_buffer_shader(struct si_context *sctx); void *si_create_fmask_expand_cs(struct pipe_context *ctx, unsigned num_samples, bool is_array); void *si_create_query_result_cs(struct si_context *sctx); void *gfx11_create_sh_query_result_cs(struct si_context *sctx); diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c index 555d98b61c0..c075e6bfa71 100644 --- a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c +++ b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c @@ -611,3 +611,24 @@ void *si_clear_render_target_shader(struct si_context *sctx, enum pipe_texture_t return create_shader_state(sctx, b.shader); } + +void *si_clear_12bytes_buffer_shader(struct si_context *sctx) +{ + const nir_shader_compiler_options *options = + sctx->b.screen->get_compiler_options(sctx->b.screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE); + + nir_builder b = + nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "clear_12bytes_buffer"); + b.shader->info.workgroup_size[0] = 64; + b.shader->info.workgroup_size[1] = 1; + b.shader->info.workgroup_size[2] = 1; + b.shader->info.cs.user_data_components_amd = 3; + + nir_def *offset = nir_imul_imm(&b, get_global_ids(&b, 1), 12); + nir_def *value = nir_trim_vector(&b, nir_load_user_data_amd(&b), 3); + + nir_store_ssbo(&b, value, nir_imm_int(&b, 0), offset, + .access = SI_COMPUTE_DST_CACHE_POLICY != L2_LRU ? ACCESS_NON_TEMPORAL : 0); + + return create_shader_state(sctx, b.shader); +} diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c index 1468bdc508c..3b13d4188b8 100644 --- a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c +++ b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c @@ -388,41 +388,6 @@ void *si_create_query_result_cs(struct si_context *sctx) return sctx->b.create_compute_state(&sctx->b, &state); } -void *si_clear_12bytes_buffer_shader(struct pipe_context *ctx) -{ - static const char text[] = "COMP\n" - "PROPERTY CS_FIXED_BLOCK_WIDTH 64\n" - "PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n" - "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" - "PROPERTY CS_USER_DATA_COMPONENTS_AMD 3\n" - "DCL SV[0], THREAD_ID\n" - "DCL SV[1], BLOCK_ID\n" - "DCL SV[2], CS_USER_DATA_AMD\n" - "DCL BUFFER[0]\n" - "DCL TEMP[0..0]\n" - "IMM[0] UINT32 {64, 1, 12, 0}\n" - "UMAD TEMP[0].x, SV[1].xyzz, IMM[0].xyyy, SV[0].xyzz\n" - "UMUL TEMP[0].x, TEMP[0].xyzz, IMM[0].zzzz\n" // 12 bytes - "STORE BUFFER[0].xyz, TEMP[0].xxxx, SV[2].xyzz%s\n" - "END\n"; - char final_text[2048]; - struct tgsi_token tokens[1024]; - struct pipe_compute_state state = {0}; - - snprintf(final_text, sizeof(final_text), text, - SI_COMPUTE_DST_CACHE_POLICY != L2_LRU ? ", STREAM_CACHE_POLICY" : ""); - - if (!tgsi_text_translate(final_text, tokens, ARRAY_SIZE(tokens))) { - assert(false); - return NULL; - } - - state.ir_type = PIPE_SHADER_IR_TGSI; - state.prog = tokens; - - return ctx->create_compute_state(ctx, &state); -} - /* Load samples from the image, and copy them to the same image. This looks like * a no-op, but it's not. Loads use FMASK, while stores don't, so samples are * reordered to match expanded FMASK.
