Module: Mesa
Branch: main
Commit: c8e122a73805924811df97071406c59ab6aee002
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c8e122a73805924811df97071406c59ab6aee002

Author: Kenneth Graunke <kenn...@whitecape.org>
Date:   Thu Feb  3 14:32:59 2022 -0800

anv: Implement rudimentary VK_AMD_buffer_marker support

This provides a basic implementation of VK_AMD_buffer_marker: we can
write the 32-bit markers from within a command buffer.  Unfortunately,
our hardware has several limitations that make this difficult to
implement well:

   1. We don't have insight into when specific stages finish (i.e.
      all geometry shaders are done, but pixel rasterization may
      still be occurring).

   2. We cannot perform pipelined writes of 32-bit values to arbitrary
      memory locations.  PIPE_CONTROL::Write Immediate Value would be
      the obvious way to implement this, but it only supports 64-bit
      values, and the extension doesn't allow us to do that.  We instead
      use MI_STORE_DATA_IMM to write 32-bit values, but this requires
      hard stalls.

Despite those limitations, the extension may still be useful for tools
to debug GPU hangs.  We hope to offer another extension in the future
which offers similar functionality but is more efficient on our GPUs.

v2: Updated by Lionel Landwerlin to fix a number of flushing and
    cache coherency issues with these writes.

Reviewed-by: Lionel Landwerlin <lionel.g.landwer...@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14924>

---

 src/intel/vulkan/anv_device.c      |  1 +
 src/intel/vulkan/genX_cmd_buffer.c | 39 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 9d52cf016e8..e42ed684f59 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -391,6 +391,7 @@ get_device_extensions(const struct anv_physical_device 
*device,
       .EXT_vertex_attribute_divisor          = true,
       .EXT_vertex_input_dynamic_state        = true,
       .EXT_ycbcr_image_arrays                = true,
+      .AMD_buffer_marker                     = true,
 #ifdef ANDROID
       .ANDROID_external_memory_android_hardware_buffer = true,
       .ANDROID_native_buffer                 = true,
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 6025c8acd40..3c0df045dc9 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -8543,3 +8543,42 @@ genX(write_trtt_entries)(struct anv_trtt_submission 
*submit)
 #endif
    return VK_SUCCESS;
 }
+
+void
+genX(CmdWriteBufferMarker2AMD)(VkCommandBuffer commandBuffer,
+                               VkPipelineStageFlags2KHR stage,
+                               VkBuffer dstBuffer,
+                               VkDeviceSize dstOffset,
+                               uint32_t marker)
+{
+   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
+   ANV_FROM_HANDLE(anv_buffer, buffer, dstBuffer);
+
+   /* The barriers inserted by the application to make dstBuffer writable
+    * should already have the L1/L2 cache flushes. On platforms where the
+    * command streamer is not coherent with L3, we need an additional set of
+    * cache flushes.
+    */
+   enum anv_pipe_bits bits =
+#if GFX_VERx10 < 125
+      ANV_PIPE_DATA_CACHE_FLUSH_BIT |
+      ANV_PIPE_TILE_CACHE_FLUSH_BIT |
+#endif
+      ANV_PIPE_END_OF_PIPE_SYNC_BIT;
+
+   anv_add_pending_pipe_bits(cmd_buffer, bits, "write buffer marker");
+   genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
+   struct mi_builder b;
+   mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
+
+   /* Emitting a PIPE_CONTROL with Post-Sync Op = Write Immediate Data
+    * would be the logical way to implement this extension, as it could
+    * do a pipelined marker write.  Unfortunately, it requires writing
+    * whole 64-bit QWords, and VK_AMD_buffer_marker requires writing a
+    * 32-bit value.  MI_STORE_DATA_IMM is the only good way to do that,
+    * and unfortunately it requires stalling.
+    */
+   mi_store(&b, mi_mem32(anv_address_add(buffer->address, dstOffset)),
+                mi_imm(marker));
+}

Reply via email to