Module: Mesa
Branch: main
Commit: 748b7f80ef1cf6a3fed9991d70230e69fef51a0e
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=748b7f80ef1cf6a3fed9991d70230e69fef51a0e

Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl>
Date:   Wed Jun  8 02:18:37 2022 +0200

radv: Move sparse binding into a dedicated queue.

1) This better reflects the reality that we only have one timeline
   of sparse binding changes.

2) Allows making it a threaded queue from the start in prep of
   explicit sync stuff.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16935>

---

 src/amd/vulkan/layers/radv_sqtt_layer.c |  2 +-
 src/amd/vulkan/radv_physical_device.c   | 23 ++++++++++++----
 src/amd/vulkan/radv_queue.c             | 48 +++++++++++++++++++++++++++++----
 3 files changed, 62 insertions(+), 11 deletions(-)

diff --git a/src/amd/vulkan/layers/radv_sqtt_layer.c 
b/src/amd/vulkan/layers/radv_sqtt_layer.c
index bcdf55fb21a..1ec1383eb2f 100644
--- a/src/amd/vulkan/layers/radv_sqtt_layer.c
+++ b/src/amd/vulkan/layers/radv_sqtt_layer.c
@@ -333,7 +333,7 @@ radv_describe_begin_cmd_buffer(struct radv_cmd_buffer 
*cmd_buffer)
    marker.device_id_low = device_id;
    marker.device_id_high = device_id >> 32;
    marker.queue = cmd_buffer->qf;
-   marker.queue_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | 
VK_QUEUE_SPARSE_BINDING_BIT;
+   marker.queue_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
 
    if (cmd_buffer->qf == RADV_QUEUE_GENERAL)
       marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT;
diff --git a/src/amd/vulkan/radv_physical_device.c 
b/src/amd/vulkan/radv_physical_device.c
index ae41d17475e..14f26b8f2e9 100644
--- a/src/amd/vulkan/radv_physical_device.c
+++ b/src/amd/vulkan/radv_physical_device.c
@@ -173,6 +173,9 @@ radv_physical_device_init_queue_table(struct 
radv_physical_device *pdevice)
          idx++;
       }
    }
+
+   pdevice->vk_queue_to_radv[idx++] = RADV_QUEUE_SPARSE;
+
    pdevice->num_queues = idx;
 }
 
@@ -2064,7 +2067,7 @@ static void
 radv_get_physical_device_queue_family_properties(struct radv_physical_device 
*pdevice, uint32_t *pCount,
                                                  VkQueueFamilyProperties 
**pQueueFamilyProperties)
 {
-   int num_queue_families = 1;
+   int num_queue_families = 2;
    int idx;
    if (pdevice->rad_info.ip[AMD_IP_COMPUTE].num_queues > 0 &&
        !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
@@ -2086,8 +2089,7 @@ radv_get_physical_device_queue_family_properties(struct 
radv_physical_device *pd
    idx = 0;
    if (*pCount >= 1) {
       *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
-         .queueFlags =
-            VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | 
VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT,
+         .queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | 
VK_QUEUE_TRANSFER_BIT,
          .queueCount = 1,
          .timestampValidBits = 64,
          .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
@@ -2099,7 +2101,7 @@ radv_get_physical_device_queue_family_properties(struct 
radv_physical_device *pd
        !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
       if (*pCount > idx) {
          *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
-            .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | 
VK_QUEUE_SPARSE_BINDING_BIT,
+            .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
             .queueCount = pdevice->rad_info.ip[AMD_IP_COMPUTE].num_queues,
             .timestampValidBits = 64,
             .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
@@ -2108,6 +2110,16 @@ radv_get_physical_device_queue_family_properties(struct 
radv_physical_device *pd
       }
    }
 
+   if (*pCount > idx) {
+      *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
+         .queueFlags = VK_QUEUE_SPARSE_BINDING_BIT,
+         .queueCount = 1,
+         .timestampValidBits = 64,
+         .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
+      };
+      idx++;
+   }
+
    if (pdevice->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE) {
       if (pdevice->rad_info.ip[pdevice->vid_decode_ip].num_queues > 0) {
          if (*pCount > idx) {
@@ -2145,9 +2157,10 @@ 
radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, ui
       &pQueueFamilyProperties[0].queueFamilyProperties,
       &pQueueFamilyProperties[1].queueFamilyProperties,
       &pQueueFamilyProperties[2].queueFamilyProperties,
+      &pQueueFamilyProperties[3].queueFamilyProperties,
    };
    radv_get_physical_device_queue_family_properties(pdevice, pCount, 
properties);
-   assert(*pCount <= 3);
+   assert(*pCount <= 4);
 
    for (uint32_t i = 0; i < *pCount; i++) {
       vk_foreach_struct (ext, pQueueFamilyProperties[i].pNext) {
diff --git a/src/amd/vulkan/radv_queue.c b/src/amd/vulkan/radv_queue.c
index abdfae500fe..85aa03c2125 100644
--- a/src/amd/vulkan/radv_queue.c
+++ b/src/amd/vulkan/radv_queue.c
@@ -1683,17 +1683,51 @@ fail:
 }
 
 static VkResult
-radv_queue_submit(struct vk_queue *vqueue, struct vk_queue_submit *submission)
+radv_queue_sparse_submit(struct vk_queue *vqueue, struct vk_queue_submit 
*submission)
 {
    struct radv_queue *queue = (struct radv_queue *)vqueue;
+   struct radv_device *device = queue->device;
    VkResult result;
 
-   radv_rmv_log_submit(queue->device, radv_queue_ring(queue));
+   result = radv_queue_submit_bind_sparse_memory(device, submission);
+   if (result != VK_SUCCESS)
+      goto fail;
 
-   result = radv_queue_submit_bind_sparse_memory(queue->device, submission);
+   /* We do a CPU wait here, in part to avoid more winsys mechanisms. In the 
likely kernel explicit
+    * sync mechanism, we'd need to do a CPU wait anyway. Haven't seen this be 
a perf issue yet, but
+    * we have to make sure the queue always has its submission thread enabled. 
*/
+   result = vk_sync_wait_many(&device->vk, submission->wait_count, 
submission->waits, 0, UINT64_MAX);
    if (result != VK_SUCCESS)
       goto fail;
 
+   /* Ignore all the commandbuffers. They're necessarily empty anyway. */
+
+   for (unsigned i = 0; i < submission->signal_count; ++i) {
+      result = vk_sync_signal(&device->vk, submission->signals[i].sync, 
submission->signals[i].signal_value);
+      if (result != VK_SUCCESS)
+         goto fail;
+   }
+
+fail:
+   if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) {
+      /* When something bad happened during the submission, such as
+       * an out of memory issue, it might be hard to recover from
+       * this inconsistent state. To avoid this sort of problem, we
+       * assume that we are in a really bad situation and return
+       * VK_ERROR_DEVICE_LOST to ensure the clients do not attempt
+       * to submit the same job again to this device.
+       */
+      result = vk_device_set_lost(&queue->device->vk, "vkQueueSubmit() 
failed");
+   }
+   return result;
+}
+
+static VkResult
+radv_queue_submit(struct vk_queue *vqueue, struct vk_queue_submit *submission)
+{
+   struct radv_queue *queue = (struct radv_queue *)vqueue;
+   VkResult result;
+
    if (!submission->command_buffer_count && !submission->wait_count && 
!submission->signal_count)
       return VK_SUCCESS;
 
@@ -1703,7 +1737,6 @@ radv_queue_submit(struct vk_queue *vqueue, struct 
vk_queue_submit *submission)
       result = radv_queue_submit_normal(queue, submission);
    }
 
-fail:
    if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) {
       /* When something bad happened during the submission, such as
        * an out of memory issue, it might be hard to recover from
@@ -1760,7 +1793,12 @@ radv_queue_init(struct radv_device *device, struct 
radv_queue *queue, int idx,
          goto fail;
    }
 
-   queue->vk.driver_submit = radv_queue_submit;
+   if (queue->state.qf == RADV_QUEUE_SPARSE) {
+      queue->vk.driver_submit = radv_queue_sparse_submit;
+      vk_queue_enable_submit_thread(&queue->vk);
+   } else {
+      queue->vk.driver_submit = radv_queue_submit;
+   }
    return VK_SUCCESS;
 fail:
    vk_queue_finish(&queue->vk);

Reply via email to