Mesa (main): radv: implement graphics shaders relocation for a RGP workaround

GitLab Mirror Fri, 10 Feb 2023 06:03:44 -0800

Module: Mesa
Branch: main
Commit: bd30f3619d573fd9ca357856314b06dd6d615d90
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=bd30f3619d573fd9ca357856314b06dd6d615d90


Author: Samuel Pitoiset <[email protected]>
Date:   Thu Feb  2 16:52:15 2023 +0100

radv: implement graphics shaders relocation for a RGP workaround

RGP requires shaders to be uploaded consecutively inside the same
buffer object. Otherwise, either it makes the driver generating
huge traces (ie. in GiB) or it fails to load traces at all. Hopefully,
this will be improved soon when AMDGPU drivers will have GPL support.

To workaround this, the driver relocates graphics shaders in the same
buffer object when a pipeline is created. Then at draw time, it
overwrites SPI_SHADER_PGM_xxx registers to make sure SQTT can match
between emitted and exported shaders. It's a bit suboptimal because
graphics shaders are uploaded twice but it's the best solution I found.

This will allow to implement GPL caching without breaking capturing
shaders with RGP.

Signed-off-by: Samuel Pitoiset <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21078>

---

 src/amd/vulkan/layers/radv_sqtt_layer.c | 179 +++++++++++++++++++++++++++++++-
 src/amd/vulkan/radv_cmd_buffer.c        |   5 +
 src/amd/vulkan/radv_private.h           |  12 +++
 3 files changed, 194 insertions(+), 2 deletions(-)

diff --git a/src/amd/vulkan/layers/radv_sqtt_layer.c 
b/src/amd/vulkan/layers/radv_sqtt_layer.c
index 10cef4c60dc..b3130d1e5a5 100644
--- a/src/amd/vulkan/layers/radv_sqtt_layer.c
+++ b/src/amd/vulkan/layers/radv_sqtt_layer.c
@@ -23,12 +23,175 @@
 
 #include "vk_common_entrypoints.h"
 #include "wsi_common_entrypoints.h"
+#include "radv_cs.h"
 #include "radv_private.h"
 #include "radv_shader.h"
 
 #include "ac_rgp.h"
 #include "ac_sqtt.h"
 
+void
+radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer,
+                                 struct radv_graphics_pipeline *pipeline)
+{
+   const enum amd_gfx_level gfx_level = 
cmd_buffer->device->physical_device->rad_info.gfx_level;
+   struct radv_sqtt_shaders_reloc *reloc = pipeline->sqtt_shaders_reloc;
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   uint64_t va;
+
+   radv_cs_add_buffer(cmd_buffer->device->ws, cs, reloc->bo);
+
+   /* VS */
+   if (pipeline->base.shaders[MESA_SHADER_VERTEX]) {
+      struct radv_shader *vs = pipeline->base.shaders[MESA_SHADER_VERTEX];
+
+      va = reloc->va[MESA_SHADER_VERTEX];
+      if (vs->info.vs.as_ls) {
+         radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
+      } else if (vs->info.vs.as_es) {
+         radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 2);
+         radeon_emit(cs, va >> 8);
+         radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
+      } else if (vs->info.is_ngg) {
+         radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
+      } else {
+         radeon_set_sh_reg_seq(cs, R_00B120_SPI_SHADER_PGM_LO_VS, 2);
+         radeon_emit(cs, va >> 8);
+         radeon_emit(cs, S_00B124_MEM_BASE(va >> 40));
+      }
+   }
+
+   /* TCS */
+   if (pipeline->base.shaders[MESA_SHADER_TESS_CTRL]) {
+      va = reloc->va[MESA_SHADER_TESS_CTRL];
+
+      if (gfx_level >= GFX9) {
+         if (gfx_level >= GFX10) {
+            radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
+         } else {
+            radeon_set_sh_reg(cs, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
+         }
+      } else {
+         radeon_set_sh_reg_seq(cs, R_00B420_SPI_SHADER_PGM_LO_HS, 2);
+         radeon_emit(cs, va >> 8);
+         radeon_emit(cs, S_00B424_MEM_BASE(va >> 40));
+      }
+   }
+
+   /* TES */
+   if (pipeline->base.shaders[MESA_SHADER_TESS_EVAL]) {
+      struct radv_shader *tes = pipeline->base.shaders[MESA_SHADER_TESS_EVAL];
+
+      va = reloc->va[MESA_SHADER_TESS_EVAL];
+      if (tes->info.is_ngg) {
+         radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
+      } else if (tes->info.tes.as_es) {
+         radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 2);
+         radeon_emit(cs, va >> 8);
+         radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
+      } else {
+         radeon_set_sh_reg_seq(cs, R_00B120_SPI_SHADER_PGM_LO_VS, 2);
+         radeon_emit(cs, va >> 8);
+         radeon_emit(cs, S_00B124_MEM_BASE(va >> 40));
+      }
+   }
+
+   /* GS */
+   if (pipeline->base.shaders[MESA_SHADER_GEOMETRY]) {
+      struct radv_shader *gs = pipeline->base.shaders[MESA_SHADER_GEOMETRY];
+
+      va = reloc->va[MESA_SHADER_GEOMETRY];
+      if (gs->info.is_ngg) {
+         radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
+      } else {
+         if (gfx_level >= GFX9) {
+            if (gfx_level >= GFX10) {
+               radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
+            } else {
+               radeon_set_sh_reg(cs, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8);
+            }
+         } else {
+            radeon_set_sh_reg_seq(cs, R_00B220_SPI_SHADER_PGM_LO_GS, 2);
+            radeon_emit(cs, va >> 8);
+            radeon_emit(cs, S_00B224_MEM_BASE(va >> 40));
+         }
+      }
+   }
+
+   /* FS */
+   if (pipeline->base.shaders[MESA_SHADER_FRAGMENT]) {
+      va = reloc->va[MESA_SHADER_FRAGMENT];
+
+      radeon_set_sh_reg_seq(cs, R_00B020_SPI_SHADER_PGM_LO_PS, 2);
+      radeon_emit(cs, va >> 8);
+      radeon_emit(cs, S_00B024_MEM_BASE(va >> 40));
+   }
+}
+
+static uint64_t
+radv_sqtt_shader_get_va_reloc(struct radv_pipeline *pipeline, gl_shader_stage 
stage)
+{
+   if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
+      struct radv_graphics_pipeline *graphics_pipeline = 
radv_pipeline_to_graphics(pipeline);
+      struct radv_sqtt_shaders_reloc *reloc = 
graphics_pipeline->sqtt_shaders_reloc;
+      return reloc->va[stage];
+   }
+
+   return radv_shader_get_va(pipeline->shaders[stage]);
+}
+
+static VkResult
+radv_sqtt_reloc_graphics_shaders(struct radv_device *device,
+                                 struct radv_graphics_pipeline *pipeline)
+{
+   struct radv_sqtt_shaders_reloc *reloc;
+   uint32_t code_size = 0;
+
+   reloc = calloc(1, sizeof(*reloc));
+   if (!reloc)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   /* Compute the total code size. */
+   for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
+      const struct radv_shader *shader = pipeline->base.shaders[i];
+      if (!shader)
+         continue;
+
+      code_size += align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT);
+   }
+
+   /* Allocate memory for all shader binaries. */
+   reloc->alloc = radv_alloc_shader_memory(device, code_size, pipeline);
+   if (!reloc->alloc) {
+      free(reloc);
+      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+   }
+
+   reloc->bo = reloc->alloc->arena->bo;
+
+   /* Relocate shader binaries to be contiguous in memory as requested by RGP. 
*/
+   uint64_t slab_va = radv_buffer_get_va(reloc->bo);
+   uint32_t slab_offset = reloc->alloc->offset;
+   char *slab_ptr = reloc->alloc->arena->ptr;
+
+   for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
+      const struct radv_shader *shader = pipeline->base.shaders[i];
+      if (!shader)
+         continue;
+
+      reloc->va[i] = slab_va + slab_offset;
+
+      void *dest_ptr = slab_ptr + slab_offset;
+      memcpy(dest_ptr, shader->code_ptr, shader->code_size);
+
+      slab_offset += align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT);
+   }
+
+   pipeline->sqtt_shaders_reloc = reloc;
+
+   return VK_SUCCESS;
+}
+
 static void
 radv_write_begin_general_api_marker(struct radv_cmd_buffer *cmd_buffer,
                                     enum rgp_sqtt_marker_general_api_type 
api_type)
@@ -977,7 +1140,7 @@ radv_add_code_object(struct radv_device *device, struct 
radv_pipeline *pipeline)
       }
       memcpy(code, shader->code_ptr, shader->code_size);
 
-      va = radv_shader_get_va(shader);
+      va = radv_sqtt_shader_get_va_reloc(pipeline, i);
 
       record->shader_data[i].hash[0] = (uint64_t)(uintptr_t)shader;
       record->shader_data[i].hash[1] = (uint64_t)(uintptr_t)shader >> 32;
@@ -1022,7 +1185,7 @@ radv_register_pipeline(struct radv_device *device, struct 
radv_pipeline *pipelin
       if (!shader)
          continue;
 
-      va = radv_shader_get_va(shader);
+      va = radv_sqtt_shader_get_va_reloc(pipeline, i);
       base_va = MIN2(base_va, va);
    }
 
@@ -1115,6 +1278,10 @@ sqtt_CreateGraphicsPipelines(VkDevice _device, 
VkPipelineCache pipelineCache, ui
       if (!pipeline)
          continue;
 
+      result = radv_sqtt_reloc_graphics_shaders(device, 
radv_pipeline_to_graphics(pipeline));
+      if (result != VK_SUCCESS)
+         goto fail;
+
       result = radv_register_pipeline(device, pipeline);
       if (result != VK_SUCCESS)
          goto fail;
@@ -1211,6 +1378,14 @@ sqtt_DestroyPipeline(VkDevice _device, VkPipeline 
_pipeline,
 
    radv_unregister_pipeline(device, pipeline);
 
+   if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
+      struct radv_graphics_pipeline *graphics_pipeline = 
radv_pipeline_to_graphics(pipeline);
+      struct radv_sqtt_shaders_reloc *reloc = 
graphics_pipeline->sqtt_shaders_reloc;
+
+      radv_free_shader_memory(device, reloc->alloc);
+      free(reloc);
+   }
+
    device->layer_dispatch.rgp.DestroyPipeline(_device, _pipeline, pAllocator);
 }
 
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 498a773db64..9a8b8835a42 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1898,6 +1898,11 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer 
*cmd_buffer)
       }
    }
 
+   if (pipeline->sqtt_shaders_reloc) {
+      /* Emit shaders relocation because RGP requires them to be contiguous in 
memory. */
+      radv_sqtt_emit_relocated_shaders(cmd_buffer, pipeline);
+   }
+
    for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
       struct radv_shader *shader = pipeline->base.shaders[s];
 
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 442ff6594e8..cc55935cf6c 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -2101,6 +2101,12 @@ struct radv_pipeline {
    uint32_t dynamic_offset_count;
 };
 
+struct radv_sqtt_shaders_reloc {
+   struct radeon_winsys_bo *bo;
+   union radv_shader_arena_block *alloc;
+   uint64_t va[MESA_VULKAN_SHADER_STAGES];
+};
+
 struct radv_graphics_pipeline {
    struct radv_pipeline base;
 
@@ -2176,6 +2182,9 @@ struct radv_graphics_pipeline {
    struct {
       nir_shader *nir;
    } retained_shaders[MESA_VULKAN_SHADER_STAGES];
+
+   /* For relocation of shaders with RGP. */
+   struct radv_sqtt_shaders_reloc *sqtt_shaders_reloc;
 };
 
 struct radv_compute_pipeline {
@@ -3069,6 +3078,9 @@ void radv_describe_barrier_end_delayed(struct 
radv_cmd_buffer *cmd_buffer);
 void radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer,
                                      const struct radv_barrier_data *barrier);
 
+void radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer,
+                                      struct radv_graphics_pipeline *pipeline);
+
 struct radv_indirect_command_layout {
    struct vk_object_base base;

Mesa (main): radv: implement graphics shaders relocation for a RGP workaround

Reply via email to