On 5/9/25 12:10, Jesse.Zhang wrote:
> This patch resolves a kernel warning that occurs during user queue 
> initialization:
> 
> [  428.714241] WARNING: CPU: 23 PID: 1965 at drivers/gpu/drm/ttm/ttm_bo.c:823 
> ttm_bo_validate+0x15c/0x1a0 [ttm]
> [  428.714758] Call Trace:
> [  428.714758]  amdgpu_bo_pin+0x1b5/0x310 [amdgpu]
> [  428.714915]  amdgpu_userq_get_doorbell_index+0x71/0x270 [amdgpu]
> 
> The warning occurs due to improper buffer object state management when
> setting up doorbells for user queues. The key issues addressed are:
> 
> 1. Incorrect locking sequence - pinning before reservation
> 2. Inadequate error handling paths
> 3. Race conditions during BO validation
> 
> Changes made:
> 1. Reordered operations to reserve BO before pinning
> 2. Added proper cleanup path for reservation failures
> 3. Improved error handling with separate unpin/unreserve paths

Looks correct to me, but IIRC I've already seen the same patch from Arun.

It's just that Arun probably hasn't committed it yet because he's on vacation 
this week.

Regards,
Christian.

> 
> Signed-off-by: Jesse Zhang <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 33 +++++++++++++----------
>  1 file changed, 19 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> index 0f1cb6bc63db..444a0f5d98ce 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> @@ -283,28 +283,31 @@ amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr 
> *uq_mgr,
>       uint64_t index;
>       struct drm_gem_object *gobj;
>       struct amdgpu_userq_obj *db_obj = db_info->db_obj;
> +     struct amdgpu_bo *bo;
>       int r, db_size;
>  
>       gobj = drm_gem_object_lookup(filp, db_info->doorbell_handle);
> -     if (gobj == NULL) {
> +     if (!gobj) {
>               drm_file_err(uq_mgr->file, "Can't find GEM object for 
> doorbell\n");
>               return -EINVAL;
>       }
>  
> -     db_obj->obj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
> +     bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
>       drm_gem_object_put(gobj);
> +     db_obj->obj = bo;
>  
> -     /* Pin the BO before generating the index, unpin in queue destroy */
> -     r = amdgpu_bo_pin(db_obj->obj, AMDGPU_GEM_DOMAIN_DOORBELL);
> +     /* First reserve the BO to ensure proper state */
> +     r = amdgpu_bo_reserve(bo, true);
>       if (r) {
> -             drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin 
> doorbell object\n");
> +             drm_file_err(uq_mgr->file, "[Usermode queues] Failed to reserve 
> doorbell BO\n");
>               goto unref_bo;
>       }
>  
> -     r = amdgpu_bo_reserve(db_obj->obj, true);
> +     /* Validate and pin the BO */
> +     r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_DOORBELL);
>       if (r) {
> -             drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin 
> doorbell object\n");
> -             goto unpin_bo;
> +             drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin 
> doorbell BO\n");
> +             goto unreserve_bo;
>       }
>  
>       switch (db_info->queue_type) {
> @@ -325,24 +328,26 @@ amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr 
> *uq_mgr,
>               break;
>  
>       default:
> -             drm_file_err(uq_mgr->file, "[Usermode queues] IP %d not 
> support\n",
> +             drm_file_err(uq_mgr->file, "IP %d not supported\n",
>                            db_info->queue_type);
>               r = -EINVAL;
>               goto unpin_bo;
>       }
>  
> -     index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj,
> +     index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, bo,
>                                            db_info->doorbell_offset, db_size);
>       drm_dbg_driver(adev_to_drm(uq_mgr->adev),
>                      "[Usermode queues] doorbell index=%lld\n", index);
> -     amdgpu_bo_unreserve(db_obj->obj);
> +
> +     amdgpu_bo_unreserve(bo);
>       return index;
>  
>  unpin_bo:
> -     amdgpu_bo_unpin(db_obj->obj);
> -
> +     amdgpu_bo_unpin(bo);
> +unreserve_bo:
> +     amdgpu_bo_unreserve(bo);
>  unref_bo:
> -     amdgpu_bo_unref(&db_obj->obj);
> +     amdgpu_bo_unref(&bo);
>       return r;
>  }
>  

Reply via email to