Module: Mesa
Branch: main
Commit: 4a4b05869a6ad909417e30d46706ad6038084901
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=4a4b05869a6ad909417e30d46706ad6038084901

Author: Ryan Neph <[email protected]>
Date:   Wed Mar 22 12:47:42 2023 -0700

venus: check and configure new ringMonitoring feature

At ring creation, if supported by renderer, we can request
ringMonitoring. During driver ring waits, the ring's new ALIVE status
bit will be checked periodically at the configured rate. If the bit is
not set, the renderer must have crashed and the driver should do the
same to signal a problem to the app/user.

Signed-off-by: Ryan Neph <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22036>

---

 src/virtio/vulkan/vn_common.c   | 57 +++++++++++++++++++++++++++++++++++++++++
 src/virtio/vulkan/vn_common.h   |  4 +++
 src/virtio/vulkan/vn_instance.c | 17 ++++++++++--
 src/virtio/vulkan/vn_ring.c     |  3 +++
 src/virtio/vulkan/vn_ring.h     | 16 ++++++++++++
 5 files changed, 95 insertions(+), 2 deletions(-)

diff --git a/src/virtio/vulkan/vn_common.c b/src/virtio/vulkan/vn_common.c
index 115961fe065..73042de9274 100644
--- a/src/virtio/vulkan/vn_common.c
+++ b/src/virtio/vulkan/vn_common.c
@@ -123,9 +123,52 @@ vn_extension_get_spec_version(const char *name)
    return index >= 0 ? vn_info_extension_get(index)->spec_version : 0;
 }
 
+static bool
+vn_ring_monitor_acquire(struct vn_ring *ring)
+{
+   pid_t tid = gettid();
+   if (!ring->monitor.threadid && tid != ring->monitor.threadid &&
+       mtx_trylock(&ring->monitor.mutex) == thrd_success) {
+      /* register as the only waiting thread that monitors the ring. */
+      ring->monitor.threadid = tid;
+   }
+   return tid == ring->monitor.threadid;
+}
+
+void
+vn_ring_monitor_release(struct vn_ring *ring)
+{
+   if (gettid() != ring->monitor.threadid)
+      return;
+
+   ring->monitor.threadid = 0;
+   mtx_unlock(&ring->monitor.mutex);
+}
+
 struct vn_relax_state
 vn_relax_init(struct vn_ring *ring, const char *reason)
 {
+   if (ring->monitor.report_period_us) {
+#ifndef NDEBUG
+      /* ensure minimum check period is greater than maximum renderer
+       * reporting period (with margin of safety to ensure no false
+       * positives).
+       *
+       * first_warn_time is pre-calculated based on parameters in vn_relax
+       * and must update together.
+       */
+      const uint32_t first_warn_time = 3481600;
+      const uint32_t safety_margin = 250000;
+      assert(first_warn_time - safety_margin >=
+             ring->monitor.report_period_us);
+#endif
+
+      if (vn_ring_monitor_acquire(ring)) {
+         ring->monitor.alive = true;
+         vn_ring_unset_status_bits(ring, VK_RING_STATUS_ALIVE_BIT_MESA);
+      }
+   }
+
    return (struct vn_relax_state){
       .ring = ring,
       .iter = 0,
@@ -143,6 +186,7 @@ vn_relax(struct vn_relax_state *state)
    /* Yield for the first 2^busy_wait_order times and then sleep for
     * base_sleep_us microseconds for the same number of times.  After that,
     * keep doubling both sleep length and count.
+    * Must also update pre-calculated "first_warn_time" in vn_relax_init().
     */
    const uint32_t busy_wait_order = 8;
    const uint32_t base_sleep_us = vn_env.relax_base_sleep_us;
@@ -167,6 +211,19 @@ vn_relax(struct vn_relax_state *state)
          abort();
       }
 
+      if (ring->monitor.report_period_us) {
+         if (vn_ring_monitor_acquire(ring)) {
+            ring->monitor.alive = status & VK_RING_STATUS_ALIVE_BIT_MESA;
+            vn_ring_unset_status_bits(ring, VK_RING_STATUS_ALIVE_BIT_MESA);
+         }
+
+         if (!ring->monitor.alive) {
+            vn_log(NULL, "aborting on expired ring alive status at iter %d",
+                   *iter);
+            abort();
+         }
+      }
+
       if (*iter >= (1 << abort_order) && !VN_DEBUG(NO_ABORT)) {
          vn_log(NULL, "aborting");
          abort();
diff --git a/src/virtio/vulkan/vn_common.h b/src/virtio/vulkan/vn_common.h
index 26477d22fb3..7ca5c5bb57c 100644
--- a/src/virtio/vulkan/vn_common.h
+++ b/src/virtio/vulkan/vn_common.h
@@ -229,6 +229,9 @@ vn_refcount_dec(struct vn_refcount *ref)
 uint32_t
 vn_extension_get_spec_version(const char *name);
 
+void
+vn_ring_monitor_release(struct vn_ring *ring);
+
 struct vn_relax_state
 vn_relax_init(struct vn_ring *ring, const char *reason);
 
@@ -238,6 +241,7 @@ vn_relax(struct vn_relax_state *state);
 static inline void
 vn_relax_fini(struct vn_relax_state *state)
 {
+   vn_ring_monitor_release(state->ring);
 }
 
 static_assert(sizeof(vn_object_id) >= sizeof(uintptr_t), "");
diff --git a/src/virtio/vulkan/vn_instance.c b/src/virtio/vulkan/vn_instance.c
index 7b5b475c401..ad0b9bf3850 100644
--- a/src/virtio/vulkan/vn_instance.c
+++ b/src/virtio/vulkan/vn_instance.c
@@ -133,8 +133,19 @@ vn_instance_init_ring(struct vn_instance *instance)
 
    instance->ring.id = (uintptr_t)ring;
 
+   struct VkRingMonitorInfoMESA monitor_info;
+   if (instance->experimental.ringMonitoring) {
+      ring->monitor.report_period_us = 3000000;
+      mtx_init(&ring->monitor.mutex, mtx_plain);
+      monitor_info = (struct VkRingMonitorInfoMESA){
+         .sType = VK_STRUCTURE_TYPE_RING_MONITOR_INFO_MESA,
+         .maxReportingPeriodMicroseconds = ring->monitor.report_period_us,
+      };
+   }
+
    const struct VkRingCreateInfoMESA info = {
       .sType = VK_STRUCTURE_TYPE_RING_CREATE_INFO_MESA,
+      .pNext = instance->experimental.ringMonitoring ? &monitor_info : NULL,
       .resourceId = instance->ring.shmem->res_id,
       .size = layout.shmem_size,
       .idleTimeout = 50ull * 1000 * 1000,
@@ -231,12 +242,14 @@ vn_instance_init_experimental_features(struct vn_instance 
*instance)
              "\n\tglobalFencing = %u"
              "\n\tlargeRing = %u"
              "\n\tsyncFdFencing = %u"
-             "\n\tasyncRoundtrip = %u",
+             "\n\tasyncRoundtrip = %u"
+             "\n\tringMonitoring = %u",
              instance->experimental.memoryResourceAllocationSize,
              instance->experimental.globalFencing,
              instance->experimental.largeRing,
              instance->experimental.syncFdFencing,
-             instance->experimental.asyncRoundtrip);
+             instance->experimental.asyncRoundtrip,
+             instance->experimental.ringMonitoring);
    }
 
    return VK_SUCCESS;
diff --git a/src/virtio/vulkan/vn_ring.c b/src/virtio/vulkan/vn_ring.c
index 189fe4bc88f..715cbaa2ba3 100644
--- a/src/virtio/vulkan/vn_ring.c
+++ b/src/virtio/vulkan/vn_ring.c
@@ -208,6 +208,9 @@ vn_ring_fini(struct vn_ring *ring)
    list_for_each_entry_safe(struct vn_ring_submit, submit,
                             &ring->free_submits, head)
       free(submit);
+
+   if (ring->monitor.report_period_us)
+      mtx_destroy(&ring->monitor.mutex);
 }
 
 struct vn_ring_submit *
diff --git a/src/virtio/vulkan/vn_ring.h b/src/virtio/vulkan/vn_ring.h
index 69594504a8b..9e15a3da508 100644
--- a/src/virtio/vulkan/vn_ring.h
+++ b/src/virtio/vulkan/vn_ring.h
@@ -70,6 +70,22 @@ struct vn_ring {
 
    struct list_head submits;
    struct list_head free_submits;
+
+   /* Only one "waiting" thread may fulfill the "monitor" role at a time.
+    * Every "report_period_us" or longer, the waiting "monitor" thread tests
+    * the ring's ALIVE status, updates the "alive" atomic, and resets the
+    * ALIVE status for the next cycle. Waiting non-"monitor" threads, just
+    * check the "alive" atomic. The "monitor" role may be released and
+    * acquired by another waiting thread dynamically.
+    */
+   struct {
+      mtx_t mutex;
+      atomic_int threadid;
+      atomic_bool alive;
+
+      /* constant and non-zero after ring init, if monitoring is enabled */
+      uint32_t report_period_us;
+   } monitor;
 };
 
 void

Reply via email to