Module: Mesa
Branch: main
Commit: b74d849a29df8b9de892d5ceb8094300076a669d
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=b74d849a29df8b9de892d5ceb8094300076a669d

Author: Marek Olšák <marek.ol...@amd.com>
Date:   Mon Oct 23 21:58:57 2023 -0400

ac/gpu_info: split has_set_pairs_packets into context and sh flags

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-pra...@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26095>

---

 src/amd/common/ac_gpu_info.c                      | 16 +++++++---------
 src/amd/common/ac_gpu_info.h                      |  4 ++--
 src/gallium/drivers/radeonsi/si_compute.c         | 12 ++++++------
 src/gallium/drivers/radeonsi/si_descriptors.c     |  8 ++++----
 src/gallium/drivers/radeonsi/si_pipe.c            |  8 ++------
 src/gallium/drivers/radeonsi/si_pm4.c             |  2 +-
 src/gallium/drivers/radeonsi/si_state_draw.cpp    |  2 +-
 src/gallium/drivers/radeonsi/si_state_shaders.cpp |  6 +++---
 src/gallium/drivers/radeonsi/si_state_viewport.c  |  2 +-
 9 files changed, 27 insertions(+), 33 deletions(-)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 54fdb097e3c..bf03a3abe80 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -1262,14 +1262,6 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct 
radeon_info *info,
 
    info->has_export_conflict_bug = info->gfx_level == GFX11;
 
-   /* Only dGPUs have SET_*_PAIRS packets for now.
-    * Register shadowing is only required by SET_SH_REG_PAIRS*, but we require 
it
-    * for SET_CONTEXT_REG_PAIRS* as well for simplicity.
-    */
-   info->has_set_pairs_packets = info->gfx_level >= GFX11 &&
-                                 info->register_shadowing_required &&
-                                 info->has_dedicated_vram;
-
    /* GFX6-8 SDMA can't ignore page faults on unmapped sparse resources. */
    info->sdma_supports_sparse = info->gfx_level >= GFX9;
 
@@ -1584,6 +1576,11 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct 
radeon_info *info,
       info->fw_based_mcbp.csa_alignment = device_info.csa_alignment;
    }
 
+   if (info->gfx_level >= GFX11 && info->has_dedicated_vram) {
+      info->has_set_context_pairs_packed = true;
+      info->has_set_sh_pairs_packed = info->register_shadowing_required;
+   }
+
    set_custom_cu_en_mask(info);
 
    const char *ib_filename = debug_get_option("AMD_PARSE_IB", NULL);
@@ -1746,7 +1743,8 @@ void ac_print_gpu_info(const struct radeon_info *info, 
FILE *f)
    fprintf(f, "    never_send_perfcounter_stop = %i\n", 
info->never_send_perfcounter_stop);
    fprintf(f, "    discardable_allows_big_page = %i\n", 
info->discardable_allows_big_page);
    fprintf(f, "    has_taskmesh_indirect0_bug = %i\n", 
info->has_taskmesh_indirect0_bug);
-   fprintf(f, "    has_set_pairs_packets = %i\n", info->has_set_pairs_packets);
+   fprintf(f, "    has_set_context_pairs_packed = %i\n", 
info->has_set_context_pairs_packed);
+   fprintf(f, "    has_set_sh_pairs_packed = %i\n", 
info->has_set_sh_pairs_packed);
    fprintf(f, "    conformant_trunc_coord = %i\n", 
info->conformant_trunc_coord);
 
    fprintf(f, "Display features:\n");
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index c9d66f7aaba..e2550c1428c 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -112,10 +112,10 @@ struct radeon_info {
    bool has_export_conflict_bug;
    bool has_vrs_ds_export_bug;
    bool has_taskmesh_indirect0_bug;
-   bool has_set_pairs_packets;
    bool sdma_supports_sparse;      /* Whether SDMA can safely access sparse 
resources. */
    bool sdma_supports_compression; /* Whether SDMA supports DCC and HTILE. */
-
+   bool has_set_context_pairs_packed;
+   bool has_set_sh_pairs_packed;
 
    /* conformant_trunc_coord is equal to TA_CNTL2.TRUNCATE_COORD_MODE, which 
exists since gfx11.
     *
diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index d12b45c86fe..bf534ce75a6 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -499,7 +499,7 @@ static bool si_switch_compute_shader(struct si_context 
*sctx, struct si_compute
    radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, shader->bo,
                              RADEON_USAGE_READ | RADEON_PRIO_SHADER_BINARY);
 
-   if (sctx->screen->info.has_set_pairs_packets) {
+   if (sctx->screen->info.has_set_sh_pairs_packed) {
       radeon_push_compute_sh_reg(R_00B830_COMPUTE_PGM_LO, shader_va >> 8);
       radeon_opt_push_compute_sh_reg(R_00B848_COMPUTE_PGM_RSRC1,
                                      SI_TRACKED_COMPUTE_PGM_RSRC1, 
config->rsrc1);
@@ -740,7 +740,7 @@ static void si_setup_nir_user_data(struct si_context *sctx, 
const struct pipe_gr
          }
          radeon_begin_again(cs);
       } else {
-         if (sctx->screen->info.has_set_pairs_packets) {
+         if (sctx->screen->info.has_set_sh_pairs_packed) {
             radeon_push_compute_sh_reg(grid_size_reg, info->grid[0]);
             radeon_push_compute_sh_reg(grid_size_reg + 4, info->grid[1]);
             radeon_push_compute_sh_reg(grid_size_reg + 8, info->grid[2]);
@@ -756,7 +756,7 @@ static void si_setup_nir_user_data(struct si_context *sctx, 
const struct pipe_gr
    if (sel->info.uses_variable_block_size) {
       uint32_t value = info->block[0] | (info->block[1] << 10) | 
(info->block[2] << 20);
 
-      if (sctx->screen->info.has_set_pairs_packets) {
+      if (sctx->screen->info.has_set_sh_pairs_packed) {
          radeon_push_compute_sh_reg(block_size_reg, value);
       } else {
          radeon_set_sh_reg(block_size_reg, value);
@@ -766,7 +766,7 @@ static void si_setup_nir_user_data(struct si_context *sctx, 
const struct pipe_gr
    if (sel->info.base.cs.user_data_components_amd) {
       unsigned num = sel->info.base.cs.user_data_components_amd;
 
-      if (sctx->screen->info.has_set_pairs_packets) {
+      if (sctx->screen->info.has_set_sh_pairs_packed) {
          for (unsigned i = 0; i < num; i++)
             radeon_push_compute_sh_reg(cs_user_data_reg + i * 4, 
sctx->cs_user_data[i]);
       } else {
@@ -802,7 +802,7 @@ static void si_emit_dispatch_packets(struct si_context 
*sctx, const struct pipe_
                                      sctx->cs_max_waves_per_sh,
                                      threadgroups_per_cu);
 
-   if (sctx->screen->info.has_set_pairs_packets) {
+   if (sctx->screen->info.has_set_sh_pairs_packed) {
       radeon_opt_push_compute_sh_reg(R_00B854_COMPUTE_RESOURCE_LIMITS,
                                      SI_TRACKED_COMPUTE_RESOURCE_LIMITS,
                                      compute_resource_limits);
@@ -844,7 +844,7 @@ static void si_emit_dispatch_packets(struct si_context 
*sctx, const struct pipe_
       dispatch_initiator |= S_00B800_PARTIAL_TG_EN(1);
    }
 
-   if (sctx->screen->info.has_set_pairs_packets) {
+   if (sctx->screen->info.has_set_sh_pairs_packed) {
       radeon_opt_push_compute_sh_reg(R_00B81C_COMPUTE_NUM_THREAD_X,
                                      SI_TRACKED_COMPUTE_NUM_THREAD_X, 
num_threads[0]);
       radeon_opt_push_compute_sh_reg(R_00B820_COMPUTE_NUM_THREAD_Y,
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index a47bc4ecafe..a00649fad20 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -2187,7 +2187,7 @@ void si_shader_change_notify(struct si_context *sctx)
    if (sh_reg_base) { \
       unsigned mask = shader_pointers_dirty & (pointer_mask); \
       \
-      if (sctx->screen->info.has_set_pairs_packets) { \
+      if (sctx->screen->info.has_set_sh_pairs_packed) { \
          u_foreach_bit(i, mask) { \
             struct si_descriptors *descs = &sctx->descriptors[i]; \
             unsigned sh_reg = sh_reg_base + descs->shader_userdata_offset; \
@@ -2214,7 +2214,7 @@ static void si_emit_global_shader_pointers(struct 
si_context *sctx, struct si_de
 {
    radeon_begin(&sctx->gfx_cs);
 
-   if (sctx->screen->info.has_set_pairs_packets) {
+   if (sctx->screen->info.has_set_sh_pairs_packed) {
       radeon_push_gfx_sh_reg(R_00B030_SPI_SHADER_USER_DATA_PS_0 + 
descs->shader_userdata_offset,
                              descs->gpu_address);
       radeon_push_gfx_sh_reg(R_00B230_SPI_SHADER_USER_DATA_GS_0 + 
descs->shader_userdata_offset,
@@ -2292,7 +2292,7 @@ void si_emit_graphics_shader_pointers(struct si_context 
*sctx, unsigned index)
                                        sh_base[PIPE_SHADER_GEOMETRY], gfx);
 
    if (sctx->gs_attribute_ring_pointer_dirty) {
-      if (sctx->screen->info.has_set_pairs_packets) {
+      if (sctx->screen->info.has_set_sh_pairs_packed) {
          radeon_push_gfx_sh_reg(R_00B230_SPI_SHADER_USER_DATA_GS_0 +
                                 GFX9_SGPR_ATTRIBUTE_RING_ADDR * 4,
                                 sctx->screen->attribute_ring->gpu_address);
@@ -2340,7 +2340,7 @@ void si_emit_compute_shader_pointers(struct si_context 
*sctx)
    sctx->shader_pointers_dirty &= ~SI_DESCS_SHADER_MASK(COMPUTE);
 
    if (sctx->compute_bindless_pointer_dirty) {
-      if (sctx->screen->info.has_set_pairs_packets) {
+      if (sctx->screen->info.has_set_sh_pairs_packed) {
          radeon_push_compute_sh_reg(base + 
sctx->bindless_descriptors.shader_userdata_offset,
                                     sctx->bindless_descriptors.gpu_address);
       } else {
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 22f231de1cc..da59e238884 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -1202,13 +1202,9 @@ static struct pipe_screen 
*radeonsi_screen_create_impl(struct radeon_winsys *ws,
       sscreen->info.use_display_dcc_with_retile_blit = false;
    }
 
-   if (sscreen->debug_flags & DBG(SHADOW_REGS)) {
+   /* Using the environment variable doesn't enable PAIRS packets for 
simplicity. */
+   if (sscreen->debug_flags & DBG(SHADOW_REGS))
       sscreen->info.register_shadowing_required = true;
-      /* Recompute has_set_pairs_packets. */
-      sscreen->info.has_set_pairs_packets = sscreen->info.gfx_level >= GFX11 &&
-                                            
sscreen->info.register_shadowing_required &&
-                                            sscreen->info.has_dedicated_vram;
-   }
 
 #ifdef LLVM_AVAILABLE
    sscreen->use_aco = (sscreen->debug_flags & DBG(USE_ACO));
diff --git a/src/gallium/drivers/radeonsi/si_pm4.c 
b/src/gallium/drivers/radeonsi/si_pm4.c
index 088d9e7a717..6eb6b777ffc 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.c
+++ b/src/gallium/drivers/radeonsi/si_pm4.c
@@ -35,7 +35,7 @@ static unsigned pairs_packed_opcode_to_regular(unsigned 
opcode)
 
 static unsigned regular_opcode_to_pairs(struct si_pm4_state *state, unsigned 
opcode)
 {
-   if (state->screen->info.has_set_pairs_packets) {
+   if (state->screen->info.has_set_sh_pairs_packed) {
       switch (opcode) {
       case PKT3_SET_CONTEXT_REG:
          return PKT3_SET_CONTEXT_REG_PAIRS_PACKED;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp 
b/src/gallium/drivers/radeonsi/si_state_draw.cpp
index adf04519298..ca8eb3b007c 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp
@@ -2407,7 +2407,7 @@ static void si_init_draw_vbo(struct si_context *sctx)
    if (!NGG && GFX_VERSION >= GFX11)
       return;
 
-   if (GFX_VERSION >= GFX11 && sctx->screen->info.has_set_pairs_packets) {
+   if (GFX_VERSION >= GFX11 && sctx->screen->info.has_set_sh_pairs_packed) {
       sctx->draw_vbo[HAS_TESS][HAS_GS][NGG] =
          si_draw_vbo<GFX_VERSION, HAS_TESS, HAS_GS, NGG, HAS_PAIRS_ON>;
 
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp 
b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
index 7e023fa3524..8b3686a5694 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
@@ -1195,7 +1195,7 @@ static void gfx10_emit_shader_ngg(struct si_context 
*sctx, unsigned index)
    radeon_begin_again(&sctx->gfx_cs);
    radeon_opt_set_uconfig_reg(sctx, R_030980_GE_PC_ALLOC, 
SI_TRACKED_GE_PC_ALLOC,
                               shader->ngg.ge_pc_alloc);
-   if (sctx->screen->info.has_set_pairs_packets) {
+   if (sctx->screen->info.has_set_sh_pairs_packed) {
       assert(!sctx->screen->info.uses_kernel_cu_mask);
       radeon_opt_push_gfx_sh_reg(R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
                                  SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
@@ -4523,7 +4523,7 @@ static void si_emit_tess_io_layout_state(struct 
si_context *sctx, unsigned index
    if (!sctx->shader.tes.cso || !sctx->shader.tcs.current)
       return;
 
-   if (sctx->screen->info.has_set_pairs_packets) {
+   if (sctx->screen->info.has_set_sh_pairs_packed) {
       radeon_opt_push_gfx_sh_reg(R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
                                  SI_TRACKED_SPI_SHADER_PGM_RSRC2_HS, 
sctx->ls_hs_rsrc2);
 
@@ -4572,7 +4572,7 @@ static void si_emit_tess_io_layout_state(struct 
si_context *sctx, unsigned index
     * tessellation is disabled. That's because those user SGPRs are only set 
in LS
     * for tessellation.
     */
-   if (sctx->screen->info.has_set_pairs_packets) {
+   if (sctx->screen->info.has_set_sh_pairs_packed) {
       radeon_opt_push_gfx_sh_reg(tes_sh_base + SI_SGPR_TES_OFFCHIP_LAYOUT * 4,
                                  
SI_TRACKED_SPI_SHADER_USER_DATA_ES__BASE_VERTEX,
                                  sctx->tcs_offchip_layout);
diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c 
b/src/gallium/drivers/radeonsi/si_state_viewport.c
index efd2f467c18..2a392dbd597 100644
--- a/src/gallium/drivers/radeonsi/si_state_viewport.c
+++ b/src/gallium/drivers/radeonsi/si_state_viewport.c
@@ -91,7 +91,7 @@ static void si_emit_cull_state(struct si_context *sctx, 
unsigned index)
    radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, 
sctx->small_prim_cull_info_buf,
                              RADEON_USAGE_READ | RADEON_PRIO_CONST_BUFFER);
 
-   if (sctx->screen->info.has_set_pairs_packets) {
+   if (sctx->screen->info.has_set_sh_pairs_packed) {
       radeon_push_gfx_sh_reg(R_00B230_SPI_SHADER_USER_DATA_GS_0 +
                              GFX9_SGPR_SMALL_PRIM_CULL_INFO * 4,
                              sctx->small_prim_cull_info_address);

Reply via email to