u_queue: fix util_queue_finish deadlock by merging lock and finish_lock

GitLab Mirror Wed, 19 Jul 2023 13:30:13 -0700

Module: Mesa
Branch: staging/23.2
Commit: 131f7e185b9ace0f24b946b9d425dc0029a85951
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=131f7e185b9ace0f24b946b9d425dc0029a85951


Author: Marek Olšák <[email protected]>
Date:   Sun Jul 16 03:24:21 2023 -0400

util/u_queue: fix util_queue_finish deadlock by merging lock and finish_lock

and by disabling the on-demand thread creation, which breaks the finish logic.

Fixes: 3713dc6b2a7 - util/u_queue: add UTIL_QUEUE_INIT_SCALE_THREADS flag
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8363

Reviewed-by: Pierre-Eric Pelloux-Prayer <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24173>
(cherry picked from commit bfdfe5aa82f349d055d2e69aaf1b46325a6772ca)

---

 .pick_status.json                               |   2 +-
 src/gallium/drivers/etnaviv/etnaviv_shader.c    |   3 +-
 src/gallium/drivers/freedreno/ir3/ir3_gallium.c |   3 +-
 src/gallium/drivers/iris/iris_program.c         |   3 +-
 src/gallium/drivers/radeonsi/si_pipe.c          |   2 +-
 src/gallium/drivers/zink/zink_screen.c          |   2 +-
 src/util/u_queue.c                              | 109 ++++++++++++++++--------
 src/util/u_queue.h                              |   4 +-
 8 files changed, 83 insertions(+), 45 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 760f98535d5..859bd1aebbe 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -1364,7 +1364,7 @@
         "description": "util/u_queue: fix util_queue_finish deadlock by 
merging lock and finish_lock",
         "nominated": true,
         "nomination_type": 1,
-        "resolution": 0,
+        "resolution": 1,
         "main_sha": null,
         "because_sha": "3713dc6b2a7472a838885b9ff1e4e07f9b9b7713",
         "notes": null
diff --git a/src/gallium/drivers/etnaviv/etnaviv_shader.c 
b/src/gallium/drivers/etnaviv/etnaviv_shader.c
index 35abe703cbc..b483966bfff 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_shader.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_shader.c
@@ -558,7 +558,8 @@ etna_set_max_shader_compiler_threads(struct pipe_screen 
*pscreen,
 {
    struct etna_screen *screen = etna_screen(pscreen);
 
-   util_queue_adjust_num_threads(&screen->shader_compiler_queue, max_threads);
+   util_queue_adjust_num_threads(&screen->shader_compiler_queue, max_threads,
+                                 false);
 }
 
 static bool
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c 
b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c
index 6a6b061c8a3..743afc12993 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c
@@ -530,7 +530,8 @@ ir3_set_max_shader_compiler_threads(struct pipe_screen 
*pscreen,
    /* This function doesn't allow a greater number of threads than
     * the queue had at its creation.
     */
-   util_queue_adjust_num_threads(&screen->compile_queue, max_threads);
+   util_queue_adjust_num_threads(&screen->compile_queue, max_threads,
+                                 false);
 }
 
 static bool
diff --git a/src/gallium/drivers/iris/iris_program.c 
b/src/gallium/drivers/iris/iris_program.c
index 1f2852ded5d..7645e44b774 100644
--- a/src/gallium/drivers/iris/iris_program.c
+++ b/src/gallium/drivers/iris/iris_program.c
@@ -2942,7 +2942,8 @@ iris_set_max_shader_compiler_threads(struct pipe_screen 
*pscreen,
                                      unsigned max_threads)
 {
    struct iris_screen *screen = (struct iris_screen *) pscreen;
-   util_queue_adjust_num_threads(&screen->shader_compiler_queue, max_threads);
+   util_queue_adjust_num_threads(&screen->shader_compiler_queue, max_threads,
+                                 false);
 }
 
 static bool
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index fb5c02c473b..91da22b5746 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -1110,7 +1110,7 @@ static void si_set_max_shader_compiler_threads(struct 
pipe_screen *screen, unsig
 
    /* This function doesn't allow a greater number of threads than
     * the queue had at its creation. */
-   util_queue_adjust_num_threads(&sscreen->shader_compiler_queue, max_threads);
+   util_queue_adjust_num_threads(&sscreen->shader_compiler_queue, max_threads, 
false);
    /* Don't change the number of threads on the low priority queue. */
 }
 
diff --git a/src/gallium/drivers/zink/zink_screen.c 
b/src/gallium/drivers/zink/zink_screen.c
index 813c032a316..b5cdf13710b 100644
--- a/src/gallium/drivers/zink/zink_screen.c
+++ b/src/gallium/drivers/zink/zink_screen.c
@@ -196,7 +196,7 @@ static void
 zink_set_max_shader_compiler_threads(struct pipe_screen *pscreen, unsigned 
max_threads)
 {
    struct zink_screen *screen = zink_screen(pscreen);
-   util_queue_adjust_num_threads(&screen->cache_get_thread, max_threads);
+   util_queue_adjust_num_threads(&screen->cache_get_thread, max_threads, 
false);
 }
 
 static bool
diff --git a/src/util/u_queue.c b/src/util/u_queue.c
index 1009912897a..ed1e96b10f7 100644
--- a/src/util/u_queue.c
+++ b/src/util/u_queue.c
@@ -45,7 +45,7 @@
 
 static void
 util_queue_kill_threads(struct util_queue *queue, unsigned keep_num_threads,
-                        bool finish_locked);
+                        bool locked);
 
 /****************************************************************************
  * Wait for all queues to assert idle when exit() is called.
@@ -363,22 +363,27 @@ util_queue_create_thread(struct util_queue *queue, 
unsigned index)
 }
 
 void
-util_queue_adjust_num_threads(struct util_queue *queue, unsigned num_threads)
+util_queue_adjust_num_threads(struct util_queue *queue, unsigned num_threads,
+                              bool locked)
 {
    num_threads = MIN2(num_threads, queue->max_threads);
    num_threads = MAX2(num_threads, 1);
 
-   simple_mtx_lock(&queue->finish_lock);
+   if (!locked)
+      mtx_lock(&queue->lock);
+
    unsigned old_num_threads = queue->num_threads;
 
    if (num_threads == old_num_threads) {
-      simple_mtx_unlock(&queue->finish_lock);
+      if (!locked)
+         mtx_unlock(&queue->lock);
       return;
    }
 
    if (num_threads < old_num_threads) {
       util_queue_kill_threads(queue, num_threads, true);
-      simple_mtx_unlock(&queue->finish_lock);
+      if (!locked)
+         mtx_unlock(&queue->lock);
       return;
    }
 
@@ -394,7 +399,9 @@ util_queue_adjust_num_threads(struct util_queue *queue, 
unsigned num_threads)
          break;
       }
    }
-   simple_mtx_unlock(&queue->finish_lock);
+
+   if (!locked)
+      mtx_unlock(&queue->lock);
 }
 
 bool
@@ -442,7 +449,6 @@ util_queue_init(struct util_queue *queue,
    queue->global_data = global_data;
 
    (void) mtx_init(&queue->lock, mtx_plain);
-   (void) simple_mtx_init(&queue->finish_lock, mtx_plain);
 
    queue->num_queued = 0;
    cnd_init(&queue->has_queued_cond);
@@ -490,33 +496,37 @@ fail:
 
 static void
 util_queue_kill_threads(struct util_queue *queue, unsigned keep_num_threads,
-                        bool finish_locked)
+                        bool locked)
 {
-   unsigned i;
-
    /* Signal all threads to terminate. */
-   if (!finish_locked)
-      simple_mtx_lock(&queue->finish_lock);
+   if (!locked)
+      mtx_lock(&queue->lock);
 
    if (keep_num_threads >= queue->num_threads) {
-      simple_mtx_unlock(&queue->finish_lock);
+      if (!locked)
+         mtx_unlock(&queue->lock);
       return;
    }
 
-   mtx_lock(&queue->lock);
    unsigned old_num_threads = queue->num_threads;
    /* Setting num_threads is what causes the threads to terminate.
     * Then cnd_broadcast wakes them up and they will exit their function.
     */
    queue->num_threads = keep_num_threads;
    cnd_broadcast(&queue->has_queued_cond);
-   mtx_unlock(&queue->lock);
 
-   for (i = keep_num_threads; i < old_num_threads; i++)
-      thrd_join(queue->threads[i], NULL);
-
-   if (!finish_locked)
-      simple_mtx_unlock(&queue->finish_lock);
+   /* Wait for threads to terminate. */
+   if (keep_num_threads < old_num_threads) {
+      /* We need to unlock the mutex to allow threads to terminate. */
+      mtx_unlock(&queue->lock);
+      for (unsigned i = keep_num_threads; i < old_num_threads; i++)
+         thrd_join(queue->threads[i], NULL);
+      if (locked)
+         mtx_lock(&queue->lock);
+   } else {
+      if (!locked)
+         mtx_unlock(&queue->lock);
+   }
 }
 
 static void
@@ -538,25 +548,27 @@ util_queue_destroy(struct util_queue *queue)
 
    cnd_destroy(&queue->has_space_cond);
    cnd_destroy(&queue->has_queued_cond);
-   simple_mtx_destroy(&queue->finish_lock);
    mtx_destroy(&queue->lock);
    free(queue->jobs);
    free(queue->threads);
 }
 
-void
-util_queue_add_job(struct util_queue *queue,
-                   void *job,
-                   struct util_queue_fence *fence,
-                   util_queue_execute_func execute,
-                   util_queue_execute_func cleanup,
-                   const size_t job_size)
+static void
+util_queue_add_job_locked(struct util_queue *queue,
+                          void *job,
+                          struct util_queue_fence *fence,
+                          util_queue_execute_func execute,
+                          util_queue_execute_func cleanup,
+                          const size_t job_size,
+                          bool locked)
 {
    struct util_queue_job *ptr;
 
-   mtx_lock(&queue->lock);
+   if (!locked)
+      mtx_lock(&queue->lock);
    if (queue->num_threads == 0) {
-      mtx_unlock(&queue->lock);
+      if (!locked)
+         mtx_unlock(&queue->lock);
       /* well no good option here, but any leaks will be
        * short-lived as things are shutting down..
        */
@@ -573,7 +585,7 @@ util_queue_add_job(struct util_queue *queue,
        queue->flags & UTIL_QUEUE_INIT_SCALE_THREADS &&
        execute != util_queue_finish_execute &&
        queue->num_threads < queue->max_threads) {
-      util_queue_adjust_num_threads(queue, queue->num_threads + 1);
+      util_queue_adjust_num_threads(queue, queue->num_threads + 1, true);
    }
 
    if (queue->num_queued == queue->max_jobs) {
@@ -625,7 +637,20 @@ util_queue_add_job(struct util_queue *queue,
 
    queue->num_queued++;
    cnd_signal(&queue->has_queued_cond);
-   mtx_unlock(&queue->lock);
+   if (!locked)
+      mtx_unlock(&queue->lock);
+}
+
+void
+util_queue_add_job(struct util_queue *queue,
+                   void *job,
+                   struct util_queue_fence *fence,
+                   util_queue_execute_func execute,
+                   util_queue_execute_func cleanup,
+                   const size_t job_size)
+{
+   util_queue_add_job_locked(queue, job, fence, execute, cleanup, job_size,
+                             false);
 }
 
 /**
@@ -680,28 +705,38 @@ util_queue_finish(struct util_queue *queue)
     * a deadlock would happen, because 1 barrier requires that all threads
     * wait for it exclusively.
     */
-   simple_mtx_lock(&queue->finish_lock);
+   mtx_lock(&queue->lock);
 
    /* The number of threads can be changed to 0, e.g. by the atexit handler. */
    if (!queue->num_threads) {
-      simple_mtx_unlock(&queue->finish_lock);
+      mtx_unlock(&queue->lock);
       return;
    }
 
+   /* We need to disable adding new threads in util_queue_add_job because
+    * the finish operation requires a fixed number of threads.
+    *
+    * Also note that util_queue_add_job can unlock the mutex if there is not
+    * enough space in the queue and wait for space.
+    */
+   unsigned saved_flags = queue->flags;
+   queue->flags &= ~UTIL_QUEUE_INIT_SCALE_THREADS;
+
    fences = malloc(queue->num_threads * sizeof(*fences));
    util_barrier_init(&barrier, queue->num_threads);
 
    for (unsigned i = 0; i < queue->num_threads; ++i) {
       util_queue_fence_init(&fences[i]);
-      util_queue_add_job(queue, &barrier, &fences[i],
-                         util_queue_finish_execute, NULL, 0);
+      util_queue_add_job_locked(queue, &barrier, &fences[i],
+                                util_queue_finish_execute, NULL, 0, true);
    }
+   queue->flags = saved_flags;
+   mtx_unlock(&queue->lock);
 
    for (unsigned i = 0; i < queue->num_threads; ++i) {
       util_queue_fence_wait(&fences[i]);
       util_queue_fence_destroy(&fences[i]);
    }
-   simple_mtx_unlock(&queue->finish_lock);
 
    free(fences);
 }
diff --git a/src/util/u_queue.h b/src/util/u_queue.h
index b46b179b846..25f2c1589ed 100644
--- a/src/util/u_queue.h
+++ b/src/util/u_queue.h
@@ -205,7 +205,6 @@ struct util_queue_job {
 /* Put this into your context. */
 struct util_queue {
    char name[14]; /* 13 characters = the thread name without the index */
-   simple_mtx_t finish_lock; /* for util_queue_finish and protects 
threads/num_threads */
    mtx_t lock;
    cnd_t has_queued_cond;
    cnd_t has_space_cond;
@@ -249,7 +248,8 @@ void util_queue_finish(struct util_queue *queue);
  * and it can't be less than 1.
  */
 void
-util_queue_adjust_num_threads(struct util_queue *queue, unsigned num_threads);
+util_queue_adjust_num_threads(struct util_queue *queue, unsigned num_threads,
+                              bool locked);
 
 int64_t util_queue_get_thread_time_nano(struct util_queue *queue,
                                         unsigned thread_index);

Mesa (staging/23.2): util/u_queue: fix util_queue_finish deadlock by merging lock and finish_lock

Reply via email to