Module: Mesa
Branch: main
Commit: 945165acadb68d9e0301c037cd99c4060180822f
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=945165acadb68d9e0301c037cd99c4060180822f

Author: Yiwei Zhang <zzyi...@chromium.org>
Date:   Wed Dec 27 16:57:25 2023 -0800

venus: TLS ring

This change adds a new venus feature: TLS ring
- co-owned by TLS and VkInstance
- initialized in TLS upon requested
- teardown happens upon thread exit or instance destroy
- teardown is split into 2 stages:
  1. one owner locks and destroys the ring and mark destroyed
  2. the other owner locks and frees up the tls ring storage

TLS ring supercedes the prior secondary ring and enables multi-thread
shader compilation and reduces the loading time of ROTTR from ~110s to
~21s (native is ~19s).

TLS ring is in fact a synchronous ring by design, and can be used to
redirect all exisiting synchronous submissions trivially. e.g. upon any
vn_call_*, request a TLS ring, wait for deps and then submit.

Signed-off-by: Yiwei Zhang <zzyi...@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26838>

---

 src/virtio/vulkan/vn_common.c   | 88 +++++++++++++++++++++++++++++++++++++++--
 src/virtio/vulkan/vn_common.h   | 24 +++++++++++
 src/virtio/vulkan/vn_instance.c |  6 +++
 src/virtio/vulkan/vn_instance.h |  1 +
 src/virtio/vulkan/vn_pipeline.c | 52 +++++++++++++-----------
 5 files changed, 144 insertions(+), 27 deletions(-)

diff --git a/src/virtio/vulkan/vn_common.c b/src/virtio/vulkan/vn_common.c
index 95da1b3d2cf..73434071d47 100644
--- a/src/virtio/vulkan/vn_common.c
+++ b/src/virtio/vulkan/vn_common.c
@@ -241,9 +241,81 @@ vn_relax(struct vn_relax_state *state)
    os_time_sleep(base_sleep_us << shift);
 }
 
+struct vn_ring *
+vn_tls_get_ring(struct vn_instance *instance)
+{
+   if (VN_PERF(NO_MULTI_RING))
+      return instance->ring.ring;
+
+   struct vn_tls *tls = vn_tls_get();
+   if (unlikely(!tls)) {
+      /* only allow to fallback on missing tls */
+      return instance->ring.ring;
+   }
+
+   /* look up tls_ring owned by instance */
+   list_for_each_entry(struct vn_tls_ring, tls_ring, &tls->tls_rings,
+                       tls_head) {
+      mtx_lock(&tls_ring->mutex);
+      if (tls_ring->instance == instance) {
+         mtx_unlock(&tls_ring->mutex);
+         assert(tls_ring->ring);
+         return tls_ring->ring;
+      }
+      mtx_unlock(&tls_ring->mutex);
+   }
+
+   struct vn_tls_ring *tls_ring = calloc(1, sizeof(*tls_ring));
+   if (!tls_ring)
+      return NULL;
+
+   /* keep the extra for potential roundtrip sync on tls ring */
+   static const size_t extra_size = sizeof(uint32_t);
+
+   /* only need a small ring for synchronous cmds on tls ring */
+   static const size_t buf_size = 16 * 1024;
+
+   struct vn_ring_layout layout;
+   vn_ring_get_layout(buf_size, extra_size, &layout);
+
+   tls_ring->ring = vn_ring_create(instance, &layout);
+   if (!tls_ring->ring) {
+      free(tls_ring);
+      return NULL;
+   }
+
+   mtx_init(&tls_ring->mutex, mtx_plain);
+   tls_ring->instance = instance;
+   list_add(&tls_ring->tls_head, &tls->tls_rings);
+   list_add(&tls_ring->vk_head, &instance->ring.tls_rings);
+
+   return tls_ring->ring;
+}
+
+void
+vn_tls_destroy_ring(struct vn_tls_ring *tls_ring)
+{
+   mtx_lock(&tls_ring->mutex);
+   if (tls_ring->ring) {
+      vn_ring_destroy(tls_ring->ring);
+      tls_ring->ring = NULL;
+      tls_ring->instance = NULL;
+      mtx_unlock(&tls_ring->mutex);
+   } else {
+      mtx_unlock(&tls_ring->mutex);
+      mtx_destroy(&tls_ring->mutex);
+      free(tls_ring);
+   }
+}
+
 static void
 vn_tls_free(void *tls)
 {
+   if (tls) {
+      list_for_each_entry_safe(struct vn_tls_ring, tls_ring,
+                               &((struct vn_tls *)tls)->tls_rings, tls_head)
+         vn_tls_destroy_ring(tls_ring);
+   }
    free(tls);
 }
 
@@ -271,9 +343,17 @@ vn_tls_get(void)
       return tls;
 
    tls = calloc(1, sizeof(*tls));
-   if (tls && tss_set(vn_tls_key, tls) == thrd_success)
-      return tls;
+   if (!tls)
+      return NULL;
 
-   free(tls);
-   return NULL;
+   /* initialize tls */
+   tls->async_pipeline_create = false;
+   list_inithead(&tls->tls_rings);
+
+   if (tss_set(vn_tls_key, tls) != thrd_success) {
+      free(tls);
+      return NULL;
+   }
+
+   return tls;
 }
diff --git a/src/virtio/vulkan/vn_common.h b/src/virtio/vulkan/vn_common.h
index c802b28493e..215a7f97940 100644
--- a/src/virtio/vulkan/vn_common.h
+++ b/src/virtio/vulkan/vn_common.h
@@ -210,11 +210,29 @@ struct vn_relax_state {
    const char *reason;
 };
 
+/* TLS ring
+ * - co-owned by TLS and VkInstance
+ * - initialized in TLS upon requested
+ * - teardown happens upon thread exit or instance destroy
+ * - teardown is split into 2 stages:
+ *   1. one owner locks and destroys the ring and mark destroyed
+ *   2. the other owner locks and frees up the tls ring storage
+ */
+struct vn_tls_ring {
+   mtx_t mutex;
+   struct vn_ring *ring;
+   struct vn_instance *instance;
+   struct list_head tls_head;
+   struct list_head vk_head;
+};
+
 struct vn_tls {
    /* Track the threads on which swapchain and command pool creations occur.
     * Pipeline create on those threads are forced async via the primary ring.
     */
    bool async_pipeline_create;
+   /* Track TLS rings owned across instances. */
+   struct list_head tls_rings;
 };
 
 void
@@ -501,4 +519,10 @@ vn_tls_get_async_pipeline_create(void)
    return true;
 }
 
+struct vn_ring *
+vn_tls_get_ring(struct vn_instance *instance);
+
+void
+vn_tls_destroy_ring(struct vn_tls_ring *tls_ring);
+
 #endif /* VN_COMMON_H */
diff --git a/src/virtio/vulkan/vn_instance.c b/src/virtio/vulkan/vn_instance.c
index 4870b05a4e0..06627aeeb5b 100644
--- a/src/virtio/vulkan/vn_instance.c
+++ b/src/virtio/vulkan/vn_instance.c
@@ -118,6 +118,10 @@ vn_instance_fini_ring(struct vn_instance *instance)
 
    vn_watchdog_fini(&instance->ring.watchdog);
 
+   list_for_each_entry_safe(struct vn_tls_ring, tls_ring,
+                            &instance->ring.tls_rings, vk_head)
+      vn_tls_destroy_ring(tls_ring);
+
    vn_ring_destroy(instance->ring.ring);
 }
 
@@ -133,6 +137,8 @@ vn_instance_init_ring(struct vn_instance *instance)
    if (!instance->ring.ring)
       return VK_ERROR_OUT_OF_HOST_MEMORY;
 
+   list_inithead(&instance->ring.tls_rings);
+
    vn_watchdog_init(&instance->ring.watchdog);
 
    mtx_init(&instance->ring.roundtrip_mutex, mtx_plain);
diff --git a/src/virtio/vulkan/vn_instance.h b/src/virtio/vulkan/vn_instance.h
index 57095e07f04..9b611d20e43 100644
--- a/src/virtio/vulkan/vn_instance.h
+++ b/src/virtio/vulkan/vn_instance.h
@@ -49,6 +49,7 @@ struct vn_instance {
 
    struct {
       struct vn_ring *ring;
+      struct list_head tls_rings;
 
       /* to synchronize renderer/ring */
       mtx_t roundtrip_mutex;
diff --git a/src/virtio/vulkan/vn_pipeline.c b/src/virtio/vulkan/vn_pipeline.c
index 375b47ef0f1..dd97c49a6cd 100644
--- a/src/virtio/vulkan/vn_pipeline.c
+++ b/src/virtio/vulkan/vn_pipeline.c
@@ -449,29 +449,25 @@ vn_DestroyPipelineCache(VkDevice device,
 static struct vn_ring *
 vn_get_target_ring(struct vn_device *dev)
 {
-   if (VN_PERF(NO_MULTI_RING))
-      return dev->primary_ring;
-
    if (vn_tls_get_async_pipeline_create())
       return dev->primary_ring;
 
-   if (!dev->secondary_ring) {
-      if (!vn_device_secondary_ring_init_once(dev)) {
-         /* fallback to primary ring submission */
-         return dev->primary_ring;
-      }
-   }
+   struct vn_ring *ring = vn_tls_get_ring(dev->instance);
+   if (!ring)
+      return NULL;
 
-   /* Ensure pipeline cache and pipeline deps are ready in the renderer.
-    *
-    * TODO:
-    * - For cache retrieval, track ring seqno of cache obj and only wait
-    *   for that seqno once.
-    * - For pipeline creation, track ring seqnos of pipeline layout and
-    *   renderpass objs it depends on, and only wait for those seqnos once.
-    */
-   vn_ring_wait_all(dev->primary_ring);
-   return dev->secondary_ring;
+   if (ring != dev->primary_ring) {
+      /* Ensure pipeline create dependencies are ready on the renderer side.
+       *
+       * TODO:
+       * - For pipeline objects, avoid object id re-use between async pipeline
+       *   destroy on the primary ring and sync pipeline create on TLS ring.
+       * - For pipeline create, track ring seqnos of layout and renderpass
+       *   objects it depends on, and only wait for those seqnos once.
+       */
+      vn_ring_wait_all(dev->primary_ring);
+   }
+   return ring;
 }
 
 VkResult
@@ -1546,8 +1542,14 @@ vn_CreateGraphicsPipelines(VkDevice device,
    }
 
    struct vn_ring *target_ring = vn_get_target_ring(dev);
-   assert(target_ring);
-   if (want_sync || target_ring == dev->secondary_ring) {
+   if (!target_ring) {
+      vk_free(alloc, fix_tmp);
+      vn_destroy_failed_pipelines(dev, createInfoCount, pPipelines, alloc);
+      STACK_ARRAY_FINISH(fix_descs);
+      return vn_error(dev->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
+
+   if (want_sync || target_ring != dev->primary_ring) {
       result = vn_call_vkCreateGraphicsPipelines(
          target_ring, device, pipelineCache, createInfoCount, pCreateInfos,
          NULL, pPipelines);
@@ -1602,8 +1604,12 @@ vn_CreateComputePipelines(VkDevice device,
    }
 
    struct vn_ring *target_ring = vn_get_target_ring(dev);
-   assert(target_ring);
-   if (want_sync || target_ring == dev->secondary_ring) {
+   if (!target_ring) {
+      vn_destroy_failed_pipelines(dev, createInfoCount, pPipelines, alloc);
+      return vn_error(dev->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
+
+   if (want_sync || target_ring != dev->primary_ring) {
       result = vn_call_vkCreateComputePipelines(
          target_ring, device, pipelineCache, createInfoCount, pCreateInfos,
          NULL, pPipelines);

Reply via email to