On 5/9/2025 4:51 PM, Christian König wrote:
On 5/9/25 12:10, Jesse.Zhang wrote:
This patch resolves a kernel warning that occurs during user queue 
initialization:

[  428.714241] WARNING: CPU: 23 PID: 1965 at drivers/gpu/drm/ttm/ttm_bo.c:823 
ttm_bo_validate+0x15c/0x1a0 [ttm]
[  428.714758] Call Trace:
[  428.714758]  amdgpu_bo_pin+0x1b5/0x310 [amdgpu]
[  428.714915]  amdgpu_userq_get_doorbell_index+0x71/0x270 [amdgpu]

The warning occurs due to improper buffer object state management when
setting up doorbells for user queues. The key issues addressed are:

1. Incorrect locking sequence - pinning before reservation
2. Inadequate error handling paths
3. Race conditions during BO validation

Changes made:
1. Reordered operations to reserve BO before pinning
2. Added proper cleanup path for reservation failures
3. Improved error handling with separate unpin/unreserve paths
Looks correct to me, but IIRC I've already seen the same patch from Arun.

It's just that Arun probably hasn't committed it yet because he's on vacation 
this week.
I committed my patch into amd-staging-drm-next.

Thanks,
Arun.

Regards,
Christian.

Signed-off-by: Jesse Zhang <[email protected]>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 33 +++++++++++++----------
  1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 0f1cb6bc63db..444a0f5d98ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -283,28 +283,31 @@ amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr 
*uq_mgr,
        uint64_t index;
        struct drm_gem_object *gobj;
        struct amdgpu_userq_obj *db_obj = db_info->db_obj;
+       struct amdgpu_bo *bo;
        int r, db_size;
gobj = drm_gem_object_lookup(filp, db_info->doorbell_handle);
-       if (gobj == NULL) {
+       if (!gobj) {
                drm_file_err(uq_mgr->file, "Can't find GEM object for 
doorbell\n");
                return -EINVAL;
        }
- db_obj->obj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+       bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
        drm_gem_object_put(gobj);
+       db_obj->obj = bo;
- /* Pin the BO before generating the index, unpin in queue destroy */
-       r = amdgpu_bo_pin(db_obj->obj, AMDGPU_GEM_DOMAIN_DOORBELL);
+       /* First reserve the BO to ensure proper state */
+       r = amdgpu_bo_reserve(bo, true);
        if (r) {
-               drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell 
object\n");
+               drm_file_err(uq_mgr->file, "[Usermode queues] Failed to reserve 
doorbell BO\n");
                goto unref_bo;
        }
- r = amdgpu_bo_reserve(db_obj->obj, true);
+       /* Validate and pin the BO */
+       r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_DOORBELL);
        if (r) {
-               drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell 
object\n");
-               goto unpin_bo;
+               drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell 
BO\n");
+               goto unreserve_bo;
        }
switch (db_info->queue_type) {
@@ -325,24 +328,26 @@ amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr 
*uq_mgr,
                break;
default:
-               drm_file_err(uq_mgr->file, "[Usermode queues] IP %d not 
support\n",
+               drm_file_err(uq_mgr->file, "IP %d not supported\n",
                             db_info->queue_type);
                r = -EINVAL;
                goto unpin_bo;
        }
- index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj,
+       index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, bo,
                                             db_info->doorbell_offset, db_size);
        drm_dbg_driver(adev_to_drm(uq_mgr->adev),
                       "[Usermode queues] doorbell index=%lld\n", index);
-       amdgpu_bo_unreserve(db_obj->obj);
+
+       amdgpu_bo_unreserve(bo);
        return index;
unpin_bo:
-       amdgpu_bo_unpin(db_obj->obj);
-
+       amdgpu_bo_unpin(bo);
+unreserve_bo:
+       amdgpu_bo_unreserve(bo);
  unref_bo:
-       amdgpu_bo_unref(&db_obj->obj);
+       amdgpu_bo_unref(&bo);
        return r;
  }

Reply via email to