Module: Mesa
Branch: main
Commit: 65b40d0b7e9edd85aefd3ae17c73ac7f84d0330f
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=65b40d0b7e9edd85aefd3ae17c73ac7f84d0330f

Author: Pierre-Eric Pelloux-Prayer <[email protected]>
Date:   Fri Mar 17 14:44:42 2023 +0100

radeonsi: implement fw based mcbp

Some chips support firmware based mcbp. If supported this means
radeonsi needs to allocate 3 buffers and pass them to the firmware.

>From there, the firmware will handle mcbp and register shadowing
on its own so we don't need to insert LOAD packet in the preamble.

Reviewed-by: Marek Olšák <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21986>

---

 src/amd/common/ac_shadowed_regs.c                  |  6 ++-
 src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c | 52 ++++++++++++++++------
 src/gallium/drivers/radeonsi/si_descriptors.c      |  2 +-
 src/gallium/drivers/radeonsi/si_gfx_cs.c           | 10 +++--
 src/gallium/drivers/radeonsi/si_pipe.c             |  3 +-
 src/gallium/drivers/radeonsi/si_pipe.h             |  9 +++-
 src/gallium/drivers/radeonsi/si_state_draw.cpp     |  4 +-
 src/gallium/drivers/radeonsi/si_state_shaders.cpp  |  6 +--
 src/gallium/include/winsys/radeon_winsys.h         |  6 +++
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c          | 25 ++++++++++-
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.h          |  2 +
 11 files changed, 97 insertions(+), 28 deletions(-)

diff --git a/src/amd/common/ac_shadowed_regs.c 
b/src/amd/common/ac_shadowed_regs.c
index f060d55cb64..834bda34bc5 100644
--- a/src/amd/common/ac_shadowed_regs.c
+++ b/src/amd/common/ac_shadowed_regs.c
@@ -4288,6 +4288,8 @@ void ac_create_shadowing_ib_preamble(const struct 
radeon_info *info,
                CC1_SHADOW_GFX_SH_REGS(1) |
                CC1_SHADOW_GLOBAL_UCONFIG(1));
 
-   for (unsigned i = 0; i < SI_NUM_SHADOWED_REG_RANGES; i++)
-      ac_build_load_reg(info, pm4_cmd_add, pm4_cmdbuf, i, gpu_address);
+   if (!info->has_fw_based_shadowing) {
+      for (unsigned i = 0; i < SI_NUM_SHADOWED_REG_RANGES; i++)
+         ac_build_load_reg(info, pm4_cmd_add, pm4_cmdbuf, i, gpu_address);
+   }
 }
diff --git a/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c 
b/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c
index ecdbc5ec71c..e584186851a 100644
--- a/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c
+++ b/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c
@@ -41,22 +41,43 @@ void si_init_cp_reg_shadowing(struct si_context *sctx)
    if (sctx->has_graphics &&
        (sctx->screen->info.mid_command_buffer_preemption_enabled ||
         sctx->screen->debug_flags & DBG(SHADOW_REGS))) {
-      sctx->shadowed_regs =
-            si_aligned_buffer_create(sctx->b.screen,
-                                     PIPE_RESOURCE_FLAG_UNMAPPABLE | 
SI_RESOURCE_FLAG_DRIVER_INTERNAL,
-                                     PIPE_USAGE_DEFAULT,
-                                     SI_SHADOWED_REG_BUFFER_SIZE,
-                                     4096);
-      if (!sctx->shadowed_regs)
-         fprintf(stderr, "radeonsi: cannot create a shadowed_regs buffer\n");
+      if (sctx->screen->info.has_fw_based_shadowing) {
+         sctx->shadowing.registers =
+               si_aligned_buffer_create(sctx->b.screen,
+                                        PIPE_RESOURCE_FLAG_UNMAPPABLE | 
SI_RESOURCE_FLAG_DRIVER_INTERNAL,
+                                        PIPE_USAGE_DEFAULT,
+                                        
sctx->screen->info.fw_based_mcbp.shadow_size,
+                                        
sctx->screen->info.fw_based_mcbp.shadow_alignment);
+         sctx->shadowing.csa =
+               si_aligned_buffer_create(sctx->b.screen,
+                                        PIPE_RESOURCE_FLAG_UNMAPPABLE | 
SI_RESOURCE_FLAG_DRIVER_INTERNAL,
+                                        PIPE_USAGE_DEFAULT,
+                                        
sctx->screen->info.fw_based_mcbp.csa_size,
+                                        
sctx->screen->info.fw_based_mcbp.csa_alignment);
+         if (!sctx->shadowing.registers || !sctx->shadowing.csa)
+            fprintf(stderr, "radeonsi: cannot create register shadowing 
buffer(s)\n");
+         else
+            sctx->ws->cs_set_mcbp_reg_shadowing_va(&sctx->gfx_cs,
+                                                   
sctx->shadowing.registers->gpu_address,
+                                                   
sctx->shadowing.csa->gpu_address);
+      } else {
+         sctx->shadowing.registers =
+               si_aligned_buffer_create(sctx->b.screen,
+                                        PIPE_RESOURCE_FLAG_UNMAPPABLE | 
SI_RESOURCE_FLAG_DRIVER_INTERNAL,
+                                        PIPE_USAGE_DEFAULT,
+                                        SI_SHADOWED_REG_BUFFER_SIZE,
+                                        4096);
+         if (!sctx->shadowing.registers)
+            fprintf(stderr, "radeonsi: cannot create a shadowed_regs 
buffer\n");
+      }
    }
 
-   si_init_cs_preamble_state(sctx, sctx->shadowed_regs != NULL);
+   si_init_cs_preamble_state(sctx, sctx->shadowing.registers != NULL);
 
-   if (sctx->shadowed_regs) {
+   if (sctx->shadowing.registers) {
       /* We need to clear the shadowed reg buffer. */
-      si_cp_dma_clear_buffer(sctx, &sctx->gfx_cs, &sctx->shadowed_regs->b.b,
-                             0, sctx->shadowed_regs->bo_size, 0, 
SI_OP_SYNC_AFTER,
+      si_cp_dma_clear_buffer(sctx, &sctx->gfx_cs, 
&sctx->shadowing.registers->b.b,
+                             0, sctx->shadowing.registers->bo_size, 0, 
SI_OP_SYNC_AFTER,
                              SI_COHERENCY_CP, L2_BYPASS);
 
       /* Create the shadowing preamble. */
@@ -72,11 +93,14 @@ void si_init_cp_reg_shadowing(struct si_context *sctx)
 
       ac_create_shadowing_ib_preamble(&sctx->screen->info,
                                       (pm4_cmd_add_fn)si_pm4_cmd_add, 
shadowing_preamble,
-                                      sctx->shadowed_regs->gpu_address, 
sctx->screen->dpbb_allowed);
+                                      sctx->shadowing.registers->gpu_address, 
sctx->screen->dpbb_allowed);
 
       /* Initialize shadowed registers as follows. */
-      radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->shadowed_regs,
+      radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->shadowing.registers,
                                 RADEON_USAGE_READWRITE | 
RADEON_PRIO_DESCRIPTORS);
+      if (sctx->shadowing.csa)
+         radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->shadowing.csa,
+                                   RADEON_USAGE_READWRITE | 
RADEON_PRIO_DESCRIPTORS);
       si_pm4_emit(sctx, shadowing_preamble);
       ac_emulate_clear_state(&sctx->screen->info, &sctx->gfx_cs, 
si_set_context_reg_array);
       si_pm4_emit(sctx, sctx->cs_preamble_state);
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 8116d33c2e9..5eb216141ce 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -2184,7 +2184,7 @@ static void si_emit_global_shader_pointers(struct 
si_context *sctx, struct si_de
       radeon_emit_one_32bit_pointer(sctx, descs, 
R_00B130_SPI_SHADER_USER_DATA_VS_0);
       radeon_emit_one_32bit_pointer(sctx, descs, 
R_00B230_SPI_SHADER_USER_DATA_GS_0);
       radeon_emit_one_32bit_pointer(sctx, descs, 
R_00B430_SPI_SHADER_USER_DATA_HS_0);
-   } else if (sctx->gfx_level == GFX9 && sctx->shadowed_regs) {
+   } else if (sctx->gfx_level == GFX9 && sctx->shadowing.registers) {
       /* We can't use the COMMON registers with register shadowing. */
       radeon_emit_one_32bit_pointer(sctx, descs, 
R_00B030_SPI_SHADER_USER_DATA_PS_0);
       radeon_emit_one_32bit_pointer(sctx, descs, 
R_00B130_SPI_SHADER_USER_DATA_VS_0);
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c 
b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index e6e94ce64d0..be2d353169f 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -417,9 +417,13 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool 
first_cs)
       radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->border_color_buffer,
                                 RADEON_USAGE_READ | RADEON_PRIO_BORDER_COLORS);
    }
-   if (ctx->shadowed_regs) {
-      radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->shadowed_regs,
+   if (ctx->shadowing.registers) {
+      radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->shadowing.registers,
                                 RADEON_USAGE_READWRITE | 
RADEON_PRIO_DESCRIPTORS);
+
+      if (ctx->shadowing.csa)
+         radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->shadowing.csa,
+                                   RADEON_USAGE_READWRITE | 
RADEON_PRIO_DESCRIPTORS);
    }
 
    si_add_all_descriptors_to_bo_list(ctx);
@@ -484,7 +488,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool 
first_cs)
    if (ctx->screen->use_ngg_culling)
       si_mark_atom_dirty(ctx, &ctx->atoms.s.ngg_cull_state);
 
-   if (first_cs || !ctx->shadowed_regs) {
+   if (first_cs || !ctx->shadowing.registers) {
       /* These don't add any buffers, so skip them with shadowing. */
       si_mark_atom_dirty(ctx, &ctx->atoms.s.clip_regs);
       /* CLEAR_STATE sets zeros. */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 08fe4583c8e..ab42506cda4 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -350,7 +350,8 @@ static void si_destroy_context(struct pipe_context *context)
    sctx->ws->fence_reference(&sctx->last_gfx_fence, NULL);
    si_resource_reference(&sctx->eop_bug_scratch, NULL);
    si_resource_reference(&sctx->eop_bug_scratch_tmz, NULL);
-   si_resource_reference(&sctx->shadowed_regs, NULL);
+   si_resource_reference(&sctx->shadowing.registers, NULL);
+   si_resource_reference(&sctx->shadowing.csa, NULL);
 
    si_destroy_compiler(&sctx->compiler);
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 282135bad16..e127e0ac0c9 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -962,7 +962,14 @@ struct si_context {
    struct u_log_context *log;
    void *query_result_shader;
    void *sh_query_result_shader;
-   struct si_resource *shadowed_regs;
+   struct {
+      /* Memory where the shadowed registers will be saved and loaded from. */
+      struct si_resource *registers;
+      /* Context Save Area: scratch area to save other required data. Only
+       * used if info->has_fw_based_mcbp is true.
+       */
+      struct si_resource *csa;
+   } shadowing;
 
    void (*emit_cache_flush)(struct si_context *ctx, struct radeon_cmdbuf *cs);
 
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp 
b/src/gallium/drivers/radeonsi/si_state_draw.cpp
index 71196c0a781..8bd203aa498 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp
@@ -1471,7 +1471,7 @@ static void si_emit_draw_packets(struct si_context *sctx, 
const struct pipe_draw
    /* draw packet */
    if (index_size) {
       /* Register shadowing doesn't shadow INDEX_TYPE. */
-      if (index_size != sctx->last_index_size || sctx->shadowed_regs ||
+      if (index_size != sctx->last_index_size || sctx->shadowing.registers ||
           (GFX_VERSION == GFX10_3 && disable_instance_packing != 
sctx->disable_instance_packing)) {
          unsigned index_type;
 
@@ -1598,7 +1598,7 @@ static void si_emit_draw_packets(struct si_context *sctx, 
const struct pipe_draw
       }
    } else {
       /* Register shadowing requires that we always emit PKT3_NUM_INSTANCES. */
-      if (sctx->shadowed_regs ||
+      if (sctx->shadowing.registers ||
           sctx->last_instance_count == SI_INSTANCE_COUNT_UNKNOWN ||
           sctx->last_instance_count != instance_count) {
          radeon_emit(PKT3(PKT3_NUM_INSTANCES, 0, 0));
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp 
b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
index 63f23ff092a..ec58fa28c17 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
@@ -3693,7 +3693,7 @@ static void si_cs_preamble_add_vgt_flush(struct 
si_context *sctx, bool tmz)
                                &sctx->cs_preamble_has_vgt_flush;
 
    /* We shouldn't get here if registers are shadowed. */
-   assert(!sctx->shadowed_regs);
+   assert(!sctx->shadowing.registers);
 
    if (*has_vgt_flush)
       return;
@@ -3810,7 +3810,7 @@ bool si_update_gs_ring_buffers(struct si_context *sctx)
                          false, 0, 0, 0);
    }
 
-   if (sctx->shadowed_regs) {
+   if (sctx->shadowing.registers) {
       /* These registers will be shadowed, so set them only once. */
       struct radeon_cmdbuf *cs = &sctx->gfx_cs;
 
@@ -4080,7 +4080,7 @@ void si_init_tess_factor_ring(struct si_context *sctx)
 
    assert((tf_ring_size_field & C_030938_SIZE) == 0);
 
-   if (sctx->shadowed_regs) {
+   if (sctx->shadowing.registers) {
       /* These registers will be shadowed, so set them only once. */
       /* TODO: tmz + shadowed_regs support */
       struct radeon_cmdbuf *cs = &sctx->gfx_cs;
diff --git a/src/gallium/include/winsys/radeon_winsys.h 
b/src/gallium/include/winsys/radeon_winsys.h
index ad5810872d0..46b9c96d844 100644
--- a/src/gallium/include/winsys/radeon_winsys.h
+++ b/src/gallium/include/winsys/radeon_winsys.h
@@ -751,6 +751,12 @@ struct radeon_winsys {
     * Stable pstate
     */
    bool (*cs_set_pstate)(struct radeon_cmdbuf *cs, enum radeon_ctx_pstate 
state);
+
+   /**
+    * Pass the VAs to the buffers where various information is saved by the FW 
during mcbp.
+    */
+   void (*cs_set_mcbp_reg_shadowing_va)(struct radeon_cmdbuf *cs, uint64_t 
regs_va,
+                                                                  uint64_t 
csa_va);
 };
 
 static inline bool radeon_emitted(struct radeon_cmdbuf *cs, unsigned num_dw)
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 8916002cad4..b7e1b9c02c5 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -1490,7 +1490,7 @@ static void amdgpu_cs_submit_ib(void *job, void *gdata, 
int thread_index)
    if (acs->ip_type == AMD_IP_GFX)
       ws->gfx_bo_list_counter += cs->num_real_buffers;
 
-   struct drm_amdgpu_cs_chunk chunks[7];
+   struct drm_amdgpu_cs_chunk chunks[8];
    unsigned num_chunks = 0;
 
    /* BO list */
@@ -1565,6 +1565,13 @@ static void amdgpu_cs_submit_ib(void *job, void *gdata, 
int thread_index)
       num_chunks++;
    }
 
+   if (ws->info.has_fw_based_shadowing) {
+      chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_CP_GFX_SHADOW;
+      chunks[num_chunks].length_dw = sizeof(struct 
drm_amdgpu_cs_chunk_cp_gfx_shadow) / 4;
+      chunks[num_chunks].chunk_data = (uintptr_t)&acs->mcbp_fw_shadow_chunk;
+      num_chunks++;
+   }
+
    /* Fence */
    if (has_user_fence) {
       chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_FENCE;
@@ -1674,6 +1681,9 @@ cleanup:
    if (r || noop)
       amdgpu_fence_signalled(cs->fence);
 
+   if (unlikely(ws->info.has_fw_based_shadowing && 
acs->mcbp_fw_shadow_chunk.flags && r == 0))
+      acs->mcbp_fw_shadow_chunk.flags = 0;
+
    cs->error_code = r;
 
    /* Only decrement num_active_ioctls for those buffers where we incremented 
it. */
@@ -1855,6 +1865,16 @@ static bool amdgpu_bo_is_referenced(struct radeon_cmdbuf 
*rcs,
    return amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo, usage);
 }
 
+static void amdgpu_cs_set_mcbp_reg_shadowing_va(struct radeon_cmdbuf 
*rcs,uint64_t regs_va,
+                                                                   uint64_t 
csa_va)
+{
+   struct amdgpu_cs *cs = amdgpu_cs(rcs);
+   cs->mcbp_fw_shadow_chunk.shadow_va = regs_va;
+   cs->mcbp_fw_shadow_chunk.csa_va = csa_va;
+   cs->mcbp_fw_shadow_chunk.gds_va = 0;
+   cs->mcbp_fw_shadow_chunk.flags = 
AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW;
+}
+
 void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws)
 {
    ws->base.ctx_create = amdgpu_ctx_create;
@@ -1880,4 +1900,7 @@ void amdgpu_cs_init_functions(struct amdgpu_screen_winsys 
*ws)
    ws->base.fence_import_sync_file = amdgpu_fence_import_sync_file;
    ws->base.fence_export_sync_file = amdgpu_fence_export_sync_file;
    ws->base.export_signalled_sync_file = amdgpu_export_signalled_sync_file;
+
+   if (ws->aws->info.has_fw_based_shadowing)
+      ws->base.cs_set_mcbp_reg_shadowing_va = 
amdgpu_cs_set_mcbp_reg_shadowing_va;
 }
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h 
b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
index 13b8bf73d4f..5038463db40 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
@@ -161,6 +161,8 @@ struct amdgpu_cs {
    struct util_queue_fence flush_completed;
    struct pipe_fence_handle *next_fence;
    struct pb_buffer *preamble_ib_bo;
+
+   struct drm_amdgpu_cs_chunk_cp_gfx_shadow mcbp_fw_shadow_chunk;
 };
 
 struct amdgpu_fence {

Reply via email to