There is a possibility of deadlock when last reference to a queue is
put in certain situations where mutex is already help when calling
the amdgpu_userq_destroy. So based on the thread where it could be
locked we pass the locked information in the destroy functionality
to avoid taking the lock again.

Cc: Liang, Prike <[email protected]>
Suggested-by: Liang, Prike <[email protected]>
Signed-off-by: Sunil Khatri <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c     | 52 +++++++++++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h     |  2 +-
 .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   |  4 +-
 3 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index ced9ade44be4..9482664e9c2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -617,13 +617,17 @@ amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr 
*uq_mgr,
 }
 
 static int
-amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct 
amdgpu_usermode_queue *queue)
+amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct 
amdgpu_usermode_queue *queue,
+                    bool locked)
 {
        struct amdgpu_device *adev = uq_mgr->adev;
        int r = 0;
 
-       cancel_delayed_work_sync(&uq_mgr->resume_work);
+       /* It safe to unlock since we are in destroy and the queue ref is only 
this */
+       if (locked)
+               mutex_unlock(&uq_mgr->userq_mutex);
 
+       cancel_delayed_work_sync(&uq_mgr->resume_work);
        /* Cancel any pending hang detection work and cleanup */
        cancel_delayed_work_sync(&queue->hang_detect_work);
 
@@ -657,13 +661,27 @@ amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, 
struct amdgpu_usermode_que
                queue->state = AMDGPU_USERQ_STATE_HUNG;
        }
        amdgpu_userq_cleanup(queue);
-       mutex_unlock(&uq_mgr->userq_mutex);
+
+       if (!locked)
+               mutex_unlock(&uq_mgr->userq_mutex);
 
        pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
 
        return r;
 }
 
+static void amdgpu_userq_kref_destroy_locked(struct kref *kref)
+{
+       int r;
+       struct amdgpu_usermode_queue *queue =
+               container_of(kref, struct amdgpu_usermode_queue, refcount);
+       struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
+
+       r = amdgpu_userq_destroy(uq_mgr, queue, true);
+       if (r)
+               drm_file_err(uq_mgr->file, "Failed to destroy usermode queue 
%d\n", r);
+}
+
 static void amdgpu_userq_kref_destroy(struct kref *kref)
 {
        int r;
@@ -671,7 +689,7 @@ static void amdgpu_userq_kref_destroy(struct kref *kref)
                container_of(kref, struct amdgpu_usermode_queue, refcount);
        struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
 
-       r = amdgpu_userq_destroy(uq_mgr, queue);
+       r = amdgpu_userq_destroy(uq_mgr, queue, false);
        if (r)
                drm_file_err(uq_mgr->file, "Failed to destroy usermode queue 
%d\n", r);
 }
@@ -689,10 +707,14 @@ struct amdgpu_usermode_queue *amdgpu_userq_get(struct 
amdgpu_userq_mgr *uq_mgr,
        return queue;
 }
 
-void amdgpu_userq_put(struct amdgpu_usermode_queue *queue)
+void amdgpu_userq_put(struct amdgpu_usermode_queue *queue, bool locked)
 {
-       if (queue)
-               kref_put(&queue->refcount, amdgpu_userq_kref_destroy);
+       if (queue) {
+               if (locked)
+                       kref_put(&queue->refcount, 
amdgpu_userq_kref_destroy_locked);
+               else
+                       kref_put(&queue->refcount, amdgpu_userq_kref_destroy);
+       }
 }
 
 static int amdgpu_userq_priority_permit(struct drm_file *filp,
@@ -978,7 +1000,7 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
                if (!queue)
                        return -ENOENT;
 
-               amdgpu_userq_put(queue);
+               amdgpu_userq_put(queue, false);
                break;
        }
 
@@ -1007,7 +1029,7 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
                        drm_file_err(uq_mgr->file,
                                     "trying restore queue without va 
mapping\n");
                        queue->state = AMDGPU_USERQ_STATE_INVALID_VA;
-                       amdgpu_userq_put(queue);
+                       amdgpu_userq_put(queue, true);
                        continue;
                }
 
@@ -1015,7 +1037,7 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
                if (r)
                        ret = r;
 
-               amdgpu_userq_put(queue);
+               amdgpu_userq_put(queue, true);
        }
 
        if (ret)
@@ -1258,7 +1280,7 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
                r = amdgpu_userq_preempt_helper(queue);
                if (r)
                        ret = r;
-               amdgpu_userq_put(queue);
+               amdgpu_userq_put(queue, true);
        }
 
        if (ret)
@@ -1298,17 +1320,17 @@ amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr 
*uq_mgr)
                struct dma_fence *f = queue->last_fence;
 
                if (!f || dma_fence_is_signaled(f)) {
-                       amdgpu_userq_put(queue);
+                       amdgpu_userq_put(queue, true);
                        continue;
                }
                ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
                if (ret <= 0) {
                        drm_file_err(uq_mgr->file, "Timed out waiting for 
fence=%llu:%llu\n",
                                     f->context, f->seqno);
-                       amdgpu_userq_put(queue);
+                       amdgpu_userq_put(queue, true);
                        return -ETIMEDOUT;
                }
-               amdgpu_userq_put(queue);
+               amdgpu_userq_put(queue, true);
        }
 
        return 0;
@@ -1366,7 +1388,7 @@ void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr 
*userq_mgr)
                if (!queue)
                        break;
 
-               amdgpu_userq_put(queue);
+               amdgpu_userq_put(queue, false);
        }
 
        xa_destroy(&userq_mgr->userq_xa);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index f0abc16d02cc..2a496e74ec6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -116,7 +116,7 @@ struct amdgpu_db_info {
 };
 
 struct amdgpu_usermode_queue *amdgpu_userq_get(struct amdgpu_userq_mgr 
*uq_mgr, u32 qid);
-void amdgpu_userq_put(struct amdgpu_usermode_queue *queue);
+void amdgpu_userq_put(struct amdgpu_usermode_queue *queue, bool locked);
 
 int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file 
*filp);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index 18390d37a7e0..10e08cb6bd13 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -612,7 +612,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void 
*data,
        kfree(syncobj_handles);
 
        if (queue)
-               amdgpu_userq_put(queue);
+               amdgpu_userq_put(queue, false);
 
        return r;
 }
@@ -914,7 +914,7 @@ amdgpu_userq_wait_return_fence_info(struct drm_file *filp,
                r = 0;
 
 put_waitq:
-       amdgpu_userq_put(waitq);
+       amdgpu_userq_put(waitq, false);
 
 free_fences:
        while (num_fences--)
-- 
2.34.1

Reply via email to