Module: Mesa Branch: main Commit: ba6d389fa7a0ac512cb9d4cdd21efde990f041b1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ba6d389fa7a0ac512cb9d4cdd21efde990f041b1
Author: Marek Olšák <[email protected]> Date: Sat Nov 6 14:08:53 2021 -0400 radeonsi: don't use GS SGPR6 for the small prim cull info use a user SGPR instead. This will be needed in the future. Also don't upload small_prim_precision because it's passed via VS_STATE_BITS. Acked-by: Pierre-Eric Pelloux-Prayer <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13811> --- src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 2 +- src/gallium/drivers/radeonsi/si_blit.c | 4 ++++ src/gallium/drivers/radeonsi/si_shader.c | 14 +++++++------- src/gallium/drivers/radeonsi/si_shader.h | 4 +++- src/gallium/drivers/radeonsi/si_state_draw.cpp | 2 +- src/gallium/drivers/radeonsi/si_state_viewport.c | 9 +++++---- 6 files changed, 21 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index 17a3747db29..6d3bc0bba3a 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -1319,7 +1319,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi) ret = si_insert_input_ptr(ctx, ret, ctx->args.base_vertex, 8 + SI_SGPR_BASE_VERTEX); ret = si_insert_input_ptr(ctx, ret, ctx->args.draw_id, 8 + SI_SGPR_DRAWID); ret = si_insert_input_ptr(ctx, ret, ctx->args.start_instance, 8 + SI_SGPR_START_INSTANCE); - ret = si_insert_input_ptr(ctx, ret, ctx->args.vertex_buffers, 8 + SI_VS_NUM_USER_SGPR); + ret = si_insert_input_ptr(ctx, ret, ctx->args.vertex_buffers, 8 + GFX9_GS_NUM_USER_SGPR); for (unsigned i = 0; i < shader->selector->num_vbos_in_user_sgprs; i++) { ret = si_insert_input_v4i32(ctx, ret, ctx->vb_descriptors[i], diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index c2c96d0dfbe..fd18d8ff4ac 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -99,6 +99,10 @@ void si_blitter_end(struct si_context *sctx) * non-global VS user SGPRs. */ sctx->shader_pointers_dirty |= SI_DESCS_SHADER_MASK(VERTEX); + /* Reset SI_SGPR_SMALL_PRIM_CULL_INFO: */ + if (sctx->screen->use_ngg_culling) + si_mark_atom_dirty(sctx, &sctx->atoms.s.ngg_cull_state); + unsigned num_vbos_in_user_sgprs = si_num_vbos_in_user_sgprs(sctx->screen); sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL && sctx->num_vertex_elements > diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 5fb4139e07c..51807271d32 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -524,10 +524,8 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader) ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.merged_wave_info); ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset); ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.scratch_offset); - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, - &ctx->small_prim_cull_info); /* SPI_SHADER_PGM_LO_GS << 8 */ - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, - NULL); /* unused (SPI_SHADER_PGM_LO/HI_GS >> 24) */ + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */ + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */ declare_global_desc_pointers(ctx); if (ctx->stage != MESA_SHADER_VERTEX || !shader->selector->info.base.vs.blit_sgprs_amd) { @@ -543,19 +541,21 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader) ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.base_vertex); ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.draw_id); ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.start_instance); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &ctx->small_prim_cull_info); + declare_vb_descriptor_input_sgprs(ctx); } } else { + /* TES or GS */ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->vs_state_bits); if (ctx->stage == MESA_SHADER_TESS_EVAL) { ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout); ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tes_offchip_addr); + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */ + ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &ctx->small_prim_cull_info); } } - if (ctx->stage == MESA_SHADER_VERTEX) - declare_vb_descriptor_input_sgprs(ctx); - /* VGPRs (first GS, then VS/TES) */ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[0]); ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[1]); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 887cdc1e7b5..d6e1720278a 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -211,9 +211,11 @@ enum /* GS limits */ GFX6_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS, - GFX9_GS_NUM_USER_SGPR = MAX2(SI_VS_NUM_USER_SGPR, SI_TES_NUM_USER_SGPR), SI_GSCOPY_NUM_USER_SGPR = SI_NUM_VS_STATE_RESOURCE_SGPRS, + GFX9_SGPR_SMALL_PRIM_CULL_INFO = MAX2(SI_VS_NUM_USER_SGPR, SI_TES_NUM_USER_SGPR), + GFX9_GS_NUM_USER_SGPR, + /* PS only */ SI_SGPR_ALPHA_REF = SI_NUM_RESOURCE_SGPRS, SI_PS_NUM_USER_SGPR, diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index 84a189be71e..2add72f5623 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -1862,7 +1862,7 @@ static bool si_upload_and_prefetch_VB_descriptors(struct si_context *sctx, if (GFX_VERSION >= GFX9) { if (HAS_TESS) sh_dw_offset = GFX9_TCS_NUM_USER_SGPR; - else if (HAS_GS) + else if (HAS_GS || NGG) sh_dw_offset = GFX9_GS_NUM_USER_SGPR; } diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c index 7db69b9df0d..8894732a7a0 100644 --- a/src/gallium/drivers/radeonsi/si_state_viewport.c +++ b/src/gallium/drivers/radeonsi/si_state_viewport.c @@ -105,6 +105,7 @@ static void si_emit_cull_state(struct si_context *sctx) { assert(sctx->screen->use_ngg_culling); + const unsigned upload_size = offsetof(struct si_small_prim_cull_info, small_prim_precision); struct si_small_prim_cull_info info; si_get_small_prim_cull_info(sctx, &info); @@ -112,8 +113,8 @@ static void si_emit_cull_state(struct si_context *sctx) memcmp(&info, &sctx->last_small_prim_cull_info, sizeof(info))) { unsigned offset = 0; - /* Align to 256, because the address is shifted by 8 bits. */ - u_upload_data(sctx->b.const_uploader, 0, sizeof(info), 256, &info, &offset, + u_upload_data(sctx->b.const_uploader, 0, upload_size, + si_optimal_tcc_alignment(sctx, upload_size), &info, &offset, (struct pipe_resource **)&sctx->small_prim_cull_info_buf); sctx->small_prim_cull_info_address = sctx->small_prim_cull_info_buf->gpu_address + offset; @@ -124,8 +125,8 @@ static void si_emit_cull_state(struct si_context *sctx) radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->small_prim_cull_info_buf, RADEON_USAGE_READ | RADEON_PRIO_CONST_BUFFER); radeon_begin(&sctx->gfx_cs); - radeon_set_sh_reg(R_00B220_SPI_SHADER_PGM_LO_GS, - sctx->small_prim_cull_info_address >> 8); + radeon_set_sh_reg(R_00B230_SPI_SHADER_USER_DATA_GS_0 + GFX9_SGPR_SMALL_PRIM_CULL_INFO * 4, + sctx->small_prim_cull_info_address); radeon_end(); /* Set VS_STATE.SMALL_PRIM_PRECISION for NGG culling.
