amdgpu: Simplify user queue locking with global device mutex

Jesse . Zhang Thu, 18 Sep 2025 10:38:57 -0700

The current user queue implementation uses a dual-mutex scheme with
both per-device (adev->userq_mutex) and per-process (uq_mgr->userq_mutex)
locking. This overcomplicated design creates potential deadlock scenarios
and makes the code harder to maintain.


Simplify the locking by switching entirely to the global adev->userq_mutex
for all user queue operations. This approach:
- Eliminates potential deadlocks between the two mutexes
- Serializes all user queue handling at the kernel level
- Provides adequate protection since user queue operations aren't
  performance-critical paths
- Makes the code more maintainable with a single locking scheme

Key changes:
1. Remove uq_mgr->userq_mutex entirely from amdgpu_userq_mgr
2. Consistently use adev->userq_mutex across all user queue operations
3. Add proper work flushing in amdgpu_userq_ensure_ev_fence() to
   prevent races when ensuring eviction fence availability
4. Maintain proper locking order throughout all user queue functions

The serialization of all user queue operations is acceptable since
these are management operations that don't need high parallelism.

Signed-off-by: Alex Deucher <alexander.deuc...@amd.com>
Signed-off-by: Jesse Zhang <jesse.zh...@amd.com>
---
 .../drm/amd/amdgpu/amdgpu_eviction_fence.c    |  7 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c     | 32 ++++++-------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h     |  1 -
 .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   |  5 +--
 4 files changed, 16 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
index 23d7d0b0d625..d6a07fac7df2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
@@ -105,9 +105,10 @@ amdgpu_eviction_fence_suspend_worker(struct work_struct 
*work)
        struct amdgpu_eviction_fence_mgr *evf_mgr = work_to_evf_mgr(work, 
suspend_work.work);
        struct amdgpu_fpriv *fpriv = evf_mgr_to_fpriv(evf_mgr);
        struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
+       struct amdgpu_device *adev = uq_mgr->adev;
        struct amdgpu_eviction_fence *ev_fence;
 
-       mutex_lock(&uq_mgr->userq_mutex);
+       mutex_lock(&adev->userq_mutex);
        spin_lock(&evf_mgr->ev_fence_lock);
        ev_fence = evf_mgr->ev_fence;
        if (ev_fence)
@@ -118,13 +119,13 @@ amdgpu_eviction_fence_suspend_worker(struct work_struct 
*work)
 
        amdgpu_userq_evict(uq_mgr, ev_fence);
 
-       mutex_unlock(&uq_mgr->userq_mutex);
+       mutex_unlock(&adev->userq_mutex);
        dma_fence_put(&ev_fence->base);
        return;
 
 unlock:
        spin_unlock(&evf_mgr->ev_fence_lock);
-       mutex_unlock(&uq_mgr->userq_mutex);
+       mutex_unlock(&adev->userq_mutex);
 }
 
 static bool amdgpu_eviction_fence_enable_signaling(struct dma_fence *f)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index b649ac0cedff..af92450ea6eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -198,17 +198,18 @@ amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr 
*uq_mgr,
                             struct amdgpu_eviction_fence_mgr *evf_mgr)
 {
        struct amdgpu_eviction_fence *ev_fence;
+       struct amdgpu_device *adev = uq_mgr->adev;
 
 retry:
        /* Flush any pending resume work to create ev_fence */
        flush_delayed_work(&uq_mgr->resume_work);
 
-       mutex_lock(&uq_mgr->userq_mutex);
+       mutex_lock(&adev->userq_mutex);
        spin_lock(&evf_mgr->ev_fence_lock);
        ev_fence = evf_mgr->ev_fence;
        spin_unlock(&evf_mgr->ev_fence_lock);
        if (!ev_fence || dma_fence_is_signaled(&ev_fence->base)) {
-               mutex_unlock(&uq_mgr->userq_mutex);
+               mutex_unlock(&adev->userq_mutex);
                /*
                 * Looks like there was no pending resume work,
                 * add one now to create a valid eviction fence
@@ -362,12 +363,12 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
        int r = 0;
 
        cancel_delayed_work_sync(&uq_mgr->resume_work);
-       mutex_lock(&uq_mgr->userq_mutex);
+       mutex_lock(&adev->userq_mutex);
 
        queue = amdgpu_userq_find(uq_mgr, queue_id);
        if (!queue) {
                drm_dbg_driver(adev_to_drm(uq_mgr->adev), "Invalid queue id to 
destroy\n");
-               mutex_unlock(&uq_mgr->userq_mutex);
+               mutex_unlock(&adev->userq_mutex);
                return -EINVAL;
        }
        amdgpu_userq_wait_for_last_fence(uq_mgr, queue);
@@ -388,7 +389,7 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
                queue->state = AMDGPU_USERQ_STATE_HUNG;
        }
        amdgpu_userq_cleanup(uq_mgr, queue, queue_id);
-       mutex_unlock(&uq_mgr->userq_mutex);
+       mutex_unlock(&adev->userq_mutex);
 
        pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
        pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
@@ -478,7 +479,6 @@ amdgpu_userq_create(struct drm_file *filp, union 
drm_amdgpu_userq *args)
                pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
                return r;
        }
-
        /*
         * There could be a situation that we are creating a new queue while
         * the other queues under this UQ_mgr are suspended. So if there is any
@@ -486,7 +486,6 @@ amdgpu_userq_create(struct drm_file *filp, union 
drm_amdgpu_userq *args)
         *
         * This will also make sure we have a valid eviction fence ready to be 
used.
         */
-       mutex_lock(&adev->userq_mutex);
        amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
 
        uq_funcs = adev->userq_funcs[args->in.ip_type];
@@ -588,9 +587,7 @@ amdgpu_userq_create(struct drm_file *filp, union 
drm_amdgpu_userq *args)
        kfree(queue_name);
 
        args->out.queue_id = qid;
-
 unlock:
-       mutex_unlock(&uq_mgr->userq_mutex);
        mutex_unlock(&adev->userq_mutex);
 
        return r;
@@ -820,11 +817,12 @@ static void amdgpu_userq_restore_worker(struct 
work_struct *work)
 {
        struct amdgpu_userq_mgr *uq_mgr = work_to_uq_mgr(work, 
resume_work.work);
        struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
+       struct amdgpu_device *adev = uq_mgr->adev;
        int ret;
 
        flush_delayed_work(&fpriv->evf_mgr.suspend_work);
 
-       mutex_lock(&uq_mgr->userq_mutex);
+       mutex_lock(&adev->userq_mutex);
 
        ret = amdgpu_userq_validate_bos(uq_mgr);
        if (ret) {
@@ -839,7 +837,7 @@ static void amdgpu_userq_restore_worker(struct work_struct 
*work)
        }
 
 unlock:
-       mutex_unlock(&uq_mgr->userq_mutex);
+       mutex_unlock(&adev->userq_mutex);
 }
 
 static int
@@ -919,7 +917,6 @@ amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
 int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file 
*file_priv,
                          struct amdgpu_device *adev)
 {
-       mutex_init(&userq_mgr->userq_mutex);
        idr_init_base(&userq_mgr->userq_idr, 1);
        userq_mgr->adev = adev;
        userq_mgr->file = file_priv;
@@ -942,7 +939,6 @@ void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr 
*userq_mgr)
        cancel_delayed_work_sync(&userq_mgr->resume_work);
 
        mutex_lock(&adev->userq_mutex);
-       mutex_lock(&userq_mgr->userq_mutex);
        idr_for_each_entry(&userq_mgr->userq_idr, queue, queue_id) {
                amdgpu_userq_wait_for_last_fence(userq_mgr, queue);
                amdgpu_userq_unmap_helper(userq_mgr, queue);
@@ -956,9 +952,7 @@ void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr 
*userq_mgr)
                }
        }
        idr_destroy(&userq_mgr->userq_idr);
-       mutex_unlock(&userq_mgr->userq_mutex);
        mutex_unlock(&adev->userq_mutex);
-       mutex_destroy(&userq_mgr->userq_mutex);
 }
 
 int amdgpu_userq_suspend(struct amdgpu_device *adev)
@@ -975,13 +969,11 @@ int amdgpu_userq_suspend(struct amdgpu_device *adev)
        mutex_lock(&adev->userq_mutex);
        list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
                cancel_delayed_work_sync(&uqm->resume_work);
-               mutex_lock(&uqm->userq_mutex);
                idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
                        r = amdgpu_userq_unmap_helper(uqm, queue);
                        if (r)
                                ret = r;
                }
-               mutex_unlock(&uqm->userq_mutex);
        }
        mutex_unlock(&adev->userq_mutex);
        return ret;
@@ -1000,13 +992,11 @@ int amdgpu_userq_resume(struct amdgpu_device *adev)
 
        mutex_lock(&adev->userq_mutex);
        list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
-               mutex_lock(&uqm->userq_mutex);
                idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
                        r = amdgpu_userq_map_helper(uqm, queue);
                        if (r)
                                ret = r;
                }
-               mutex_unlock(&uqm->userq_mutex);
        }
        mutex_unlock(&adev->userq_mutex);
        return ret;
@@ -1031,7 +1021,6 @@ int amdgpu_userq_stop_sched_for_enforce_isolation(struct 
amdgpu_device *adev,
        adev->userq_halt_for_enforce_isolation = true;
        list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
                cancel_delayed_work_sync(&uqm->resume_work);
-               mutex_lock(&uqm->userq_mutex);
                idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
                        if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
                             (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
@@ -1041,7 +1030,6 @@ int amdgpu_userq_stop_sched_for_enforce_isolation(struct 
amdgpu_device *adev,
                                        ret = r;
                        }
                }
-               mutex_unlock(&uqm->userq_mutex);
        }
        mutex_unlock(&adev->userq_mutex);
        return ret;
@@ -1065,7 +1053,6 @@ int amdgpu_userq_start_sched_for_enforce_isolation(struct 
amdgpu_device *adev,
                dev_warn(adev->dev, "userq scheduling already started!\n");
        adev->userq_halt_for_enforce_isolation = false;
        list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
-               mutex_lock(&uqm->userq_mutex);
                idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
                        if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
                             (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
@@ -1075,7 +1062,6 @@ int amdgpu_userq_start_sched_for_enforce_isolation(struct 
amdgpu_device *adev,
                                        ret = r;
                        }
                }
-               mutex_unlock(&uqm->userq_mutex);
        }
        mutex_unlock(&adev->userq_mutex);
        return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index c027dd916672..2d63308d55c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -89,7 +89,6 @@ struct amdgpu_userq_funcs {
 /* Usermode queues for gfx */
 struct amdgpu_userq_mgr {
        struct idr                      userq_idr;
-       struct mutex                    userq_mutex;
        struct amdgpu_device            *adev;
        struct delayed_work             resume_work;
        struct list_head                list;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index 95e91d1dc58a..daf3be92a39c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -464,6 +464,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void 
*data,
        u32 *bo_handles_write, num_write_bo_handles;
        u32 *syncobj_handles, num_syncobj_handles;
        u32 *bo_handles_read, num_read_bo_handles;
+       struct amdgpu_device *adev = userq_mgr->adev;
        int r, i, entry, rentry, wentry;
        struct dma_fence *fence;
        struct drm_exec exec;
@@ -557,14 +558,14 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, 
void *data,
        /* Create a new fence */
        r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence);
        if (r) {
-               mutex_unlock(&userq_mgr->userq_mutex);
+               mutex_unlock(&adev->userq_mutex);
                kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
                goto put_gobj_write;
        }
 
        dma_fence_put(queue->last_fence);
        queue->last_fence = dma_fence_get(fence);
-       mutex_unlock(&userq_mgr->userq_mutex);
+       mutex_unlock(&adev->userq_mutex);
 
        drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
                      (num_read_bo_handles + num_write_bo_handles));
-- 
2.49.0

[v2 1/2] drm/amdgpu: Simplify user queue locking with global device mutex

Reply via email to