Module: Mesa Branch: main Commit: b74d849a29df8b9de892d5ceb8094300076a669d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b74d849a29df8b9de892d5ceb8094300076a669d
Author: Marek Olšák <marek.ol...@amd.com> Date: Mon Oct 23 21:58:57 2023 -0400 ac/gpu_info: split has_set_pairs_packets into context and sh flags Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-pra...@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26095> --- src/amd/common/ac_gpu_info.c | 16 +++++++--------- src/amd/common/ac_gpu_info.h | 4 ++-- src/gallium/drivers/radeonsi/si_compute.c | 12 ++++++------ src/gallium/drivers/radeonsi/si_descriptors.c | 8 ++++---- src/gallium/drivers/radeonsi/si_pipe.c | 8 ++------ src/gallium/drivers/radeonsi/si_pm4.c | 2 +- src/gallium/drivers/radeonsi/si_state_draw.cpp | 2 +- src/gallium/drivers/radeonsi/si_state_shaders.cpp | 6 +++--- src/gallium/drivers/radeonsi/si_state_viewport.c | 2 +- 9 files changed, 27 insertions(+), 33 deletions(-) diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 54fdb097e3c..bf03a3abe80 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -1262,14 +1262,6 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, info->has_export_conflict_bug = info->gfx_level == GFX11; - /* Only dGPUs have SET_*_PAIRS packets for now. - * Register shadowing is only required by SET_SH_REG_PAIRS*, but we require it - * for SET_CONTEXT_REG_PAIRS* as well for simplicity. - */ - info->has_set_pairs_packets = info->gfx_level >= GFX11 && - info->register_shadowing_required && - info->has_dedicated_vram; - /* GFX6-8 SDMA can't ignore page faults on unmapped sparse resources. */ info->sdma_supports_sparse = info->gfx_level >= GFX9; @@ -1584,6 +1576,11 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, info->fw_based_mcbp.csa_alignment = device_info.csa_alignment; } + if (info->gfx_level >= GFX11 && info->has_dedicated_vram) { + info->has_set_context_pairs_packed = true; + info->has_set_sh_pairs_packed = info->register_shadowing_required; + } + set_custom_cu_en_mask(info); const char *ib_filename = debug_get_option("AMD_PARSE_IB", NULL); @@ -1746,7 +1743,8 @@ void ac_print_gpu_info(const struct radeon_info *info, FILE *f) fprintf(f, " never_send_perfcounter_stop = %i\n", info->never_send_perfcounter_stop); fprintf(f, " discardable_allows_big_page = %i\n", info->discardable_allows_big_page); fprintf(f, " has_taskmesh_indirect0_bug = %i\n", info->has_taskmesh_indirect0_bug); - fprintf(f, " has_set_pairs_packets = %i\n", info->has_set_pairs_packets); + fprintf(f, " has_set_context_pairs_packed = %i\n", info->has_set_context_pairs_packed); + fprintf(f, " has_set_sh_pairs_packed = %i\n", info->has_set_sh_pairs_packed); fprintf(f, " conformant_trunc_coord = %i\n", info->conformant_trunc_coord); fprintf(f, "Display features:\n"); diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index c9d66f7aaba..e2550c1428c 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -112,10 +112,10 @@ struct radeon_info { bool has_export_conflict_bug; bool has_vrs_ds_export_bug; bool has_taskmesh_indirect0_bug; - bool has_set_pairs_packets; bool sdma_supports_sparse; /* Whether SDMA can safely access sparse resources. */ bool sdma_supports_compression; /* Whether SDMA supports DCC and HTILE. */ - + bool has_set_context_pairs_packed; + bool has_set_sh_pairs_packed; /* conformant_trunc_coord is equal to TA_CNTL2.TRUNCATE_COORD_MODE, which exists since gfx11. * diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index d12b45c86fe..bf534ce75a6 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -499,7 +499,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, shader->bo, RADEON_USAGE_READ | RADEON_PRIO_SHADER_BINARY); - if (sctx->screen->info.has_set_pairs_packets) { + if (sctx->screen->info.has_set_sh_pairs_packed) { radeon_push_compute_sh_reg(R_00B830_COMPUTE_PGM_LO, shader_va >> 8); radeon_opt_push_compute_sh_reg(R_00B848_COMPUTE_PGM_RSRC1, SI_TRACKED_COMPUTE_PGM_RSRC1, config->rsrc1); @@ -740,7 +740,7 @@ static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_gr } radeon_begin_again(cs); } else { - if (sctx->screen->info.has_set_pairs_packets) { + if (sctx->screen->info.has_set_sh_pairs_packed) { radeon_push_compute_sh_reg(grid_size_reg, info->grid[0]); radeon_push_compute_sh_reg(grid_size_reg + 4, info->grid[1]); radeon_push_compute_sh_reg(grid_size_reg + 8, info->grid[2]); @@ -756,7 +756,7 @@ static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_gr if (sel->info.uses_variable_block_size) { uint32_t value = info->block[0] | (info->block[1] << 10) | (info->block[2] << 20); - if (sctx->screen->info.has_set_pairs_packets) { + if (sctx->screen->info.has_set_sh_pairs_packed) { radeon_push_compute_sh_reg(block_size_reg, value); } else { radeon_set_sh_reg(block_size_reg, value); @@ -766,7 +766,7 @@ static void si_setup_nir_user_data(struct si_context *sctx, const struct pipe_gr if (sel->info.base.cs.user_data_components_amd) { unsigned num = sel->info.base.cs.user_data_components_amd; - if (sctx->screen->info.has_set_pairs_packets) { + if (sctx->screen->info.has_set_sh_pairs_packed) { for (unsigned i = 0; i < num; i++) radeon_push_compute_sh_reg(cs_user_data_reg + i * 4, sctx->cs_user_data[i]); } else { @@ -802,7 +802,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_ sctx->cs_max_waves_per_sh, threadgroups_per_cu); - if (sctx->screen->info.has_set_pairs_packets) { + if (sctx->screen->info.has_set_sh_pairs_packed) { radeon_opt_push_compute_sh_reg(R_00B854_COMPUTE_RESOURCE_LIMITS, SI_TRACKED_COMPUTE_RESOURCE_LIMITS, compute_resource_limits); @@ -844,7 +844,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_ dispatch_initiator |= S_00B800_PARTIAL_TG_EN(1); } - if (sctx->screen->info.has_set_pairs_packets) { + if (sctx->screen->info.has_set_sh_pairs_packed) { radeon_opt_push_compute_sh_reg(R_00B81C_COMPUTE_NUM_THREAD_X, SI_TRACKED_COMPUTE_NUM_THREAD_X, num_threads[0]); radeon_opt_push_compute_sh_reg(R_00B820_COMPUTE_NUM_THREAD_Y, diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index a47bc4ecafe..a00649fad20 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -2187,7 +2187,7 @@ void si_shader_change_notify(struct si_context *sctx) if (sh_reg_base) { \ unsigned mask = shader_pointers_dirty & (pointer_mask); \ \ - if (sctx->screen->info.has_set_pairs_packets) { \ + if (sctx->screen->info.has_set_sh_pairs_packed) { \ u_foreach_bit(i, mask) { \ struct si_descriptors *descs = &sctx->descriptors[i]; \ unsigned sh_reg = sh_reg_base + descs->shader_userdata_offset; \ @@ -2214,7 +2214,7 @@ static void si_emit_global_shader_pointers(struct si_context *sctx, struct si_de { radeon_begin(&sctx->gfx_cs); - if (sctx->screen->info.has_set_pairs_packets) { + if (sctx->screen->info.has_set_sh_pairs_packed) { radeon_push_gfx_sh_reg(R_00B030_SPI_SHADER_USER_DATA_PS_0 + descs->shader_userdata_offset, descs->gpu_address); radeon_push_gfx_sh_reg(R_00B230_SPI_SHADER_USER_DATA_GS_0 + descs->shader_userdata_offset, @@ -2292,7 +2292,7 @@ void si_emit_graphics_shader_pointers(struct si_context *sctx, unsigned index) sh_base[PIPE_SHADER_GEOMETRY], gfx); if (sctx->gs_attribute_ring_pointer_dirty) { - if (sctx->screen->info.has_set_pairs_packets) { + if (sctx->screen->info.has_set_sh_pairs_packed) { radeon_push_gfx_sh_reg(R_00B230_SPI_SHADER_USER_DATA_GS_0 + GFX9_SGPR_ATTRIBUTE_RING_ADDR * 4, sctx->screen->attribute_ring->gpu_address); @@ -2340,7 +2340,7 @@ void si_emit_compute_shader_pointers(struct si_context *sctx) sctx->shader_pointers_dirty &= ~SI_DESCS_SHADER_MASK(COMPUTE); if (sctx->compute_bindless_pointer_dirty) { - if (sctx->screen->info.has_set_pairs_packets) { + if (sctx->screen->info.has_set_sh_pairs_packed) { radeon_push_compute_sh_reg(base + sctx->bindless_descriptors.shader_userdata_offset, sctx->bindless_descriptors.gpu_address); } else { diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 22f231de1cc..da59e238884 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -1202,13 +1202,9 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, sscreen->info.use_display_dcc_with_retile_blit = false; } - if (sscreen->debug_flags & DBG(SHADOW_REGS)) { + /* Using the environment variable doesn't enable PAIRS packets for simplicity. */ + if (sscreen->debug_flags & DBG(SHADOW_REGS)) sscreen->info.register_shadowing_required = true; - /* Recompute has_set_pairs_packets. */ - sscreen->info.has_set_pairs_packets = sscreen->info.gfx_level >= GFX11 && - sscreen->info.register_shadowing_required && - sscreen->info.has_dedicated_vram; - } #ifdef LLVM_AVAILABLE sscreen->use_aco = (sscreen->debug_flags & DBG(USE_ACO)); diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c index 088d9e7a717..6eb6b777ffc 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.c +++ b/src/gallium/drivers/radeonsi/si_pm4.c @@ -35,7 +35,7 @@ static unsigned pairs_packed_opcode_to_regular(unsigned opcode) static unsigned regular_opcode_to_pairs(struct si_pm4_state *state, unsigned opcode) { - if (state->screen->info.has_set_pairs_packets) { + if (state->screen->info.has_set_sh_pairs_packed) { switch (opcode) { case PKT3_SET_CONTEXT_REG: return PKT3_SET_CONTEXT_REG_PAIRS_PACKED; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index adf04519298..ca8eb3b007c 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -2407,7 +2407,7 @@ static void si_init_draw_vbo(struct si_context *sctx) if (!NGG && GFX_VERSION >= GFX11) return; - if (GFX_VERSION >= GFX11 && sctx->screen->info.has_set_pairs_packets) { + if (GFX_VERSION >= GFX11 && sctx->screen->info.has_set_sh_pairs_packed) { sctx->draw_vbo[HAS_TESS][HAS_GS][NGG] = si_draw_vbo<GFX_VERSION, HAS_TESS, HAS_GS, NGG, HAS_PAIRS_ON>; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 7e023fa3524..8b3686a5694 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -1195,7 +1195,7 @@ static void gfx10_emit_shader_ngg(struct si_context *sctx, unsigned index) radeon_begin_again(&sctx->gfx_cs); radeon_opt_set_uconfig_reg(sctx, R_030980_GE_PC_ALLOC, SI_TRACKED_GE_PC_ALLOC, shader->ngg.ge_pc_alloc); - if (sctx->screen->info.has_set_pairs_packets) { + if (sctx->screen->info.has_set_sh_pairs_packed) { assert(!sctx->screen->info.uses_kernel_cu_mask); radeon_opt_push_gfx_sh_reg(R_00B21C_SPI_SHADER_PGM_RSRC3_GS, SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS, @@ -4523,7 +4523,7 @@ static void si_emit_tess_io_layout_state(struct si_context *sctx, unsigned index if (!sctx->shader.tes.cso || !sctx->shader.tcs.current) return; - if (sctx->screen->info.has_set_pairs_packets) { + if (sctx->screen->info.has_set_sh_pairs_packed) { radeon_opt_push_gfx_sh_reg(R_00B42C_SPI_SHADER_PGM_RSRC2_HS, SI_TRACKED_SPI_SHADER_PGM_RSRC2_HS, sctx->ls_hs_rsrc2); @@ -4572,7 +4572,7 @@ static void si_emit_tess_io_layout_state(struct si_context *sctx, unsigned index * tessellation is disabled. That's because those user SGPRs are only set in LS * for tessellation. */ - if (sctx->screen->info.has_set_pairs_packets) { + if (sctx->screen->info.has_set_sh_pairs_packed) { radeon_opt_push_gfx_sh_reg(tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4, SI_TRACKED_SPI_SHADER_USER_DATA_ES__BASE_VERTEX, sctx->tcs_offchip_layout); diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c index efd2f467c18..2a392dbd597 100644 --- a/src/gallium/drivers/radeonsi/si_state_viewport.c +++ b/src/gallium/drivers/radeonsi/si_state_viewport.c @@ -91,7 +91,7 @@ static void si_emit_cull_state(struct si_context *sctx, unsigned index) radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->small_prim_cull_info_buf, RADEON_USAGE_READ | RADEON_PRIO_CONST_BUFFER); - if (sctx->screen->info.has_set_pairs_packets) { + if (sctx->screen->info.has_set_sh_pairs_packed) { radeon_push_gfx_sh_reg(R_00B230_SPI_SHADER_USER_DATA_GS_0 + GFX9_SGPR_SMALL_PRIM_CULL_INFO * 4, sctx->small_prim_cull_info_address);