On 3/6/26 11:14, Sunil Khatri wrote:
> Clean up the amdgpu_userq_create function clean up in
> failure condition using goto method. This avoid replication
> of cleanup for every failure conditon.
>
> Signed-off-by: Sunil Khatri <[email protected]>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 45 ++++++++++-------------
> 1 file changed, 20 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> index a614b01b7eab..aef9b5855812 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> @@ -758,7 +758,7 @@ amdgpu_userq_create(struct drm_file *filp, union
> drm_amdgpu_userq *args)
> const struct amdgpu_userq_funcs *uq_funcs;
> struct amdgpu_usermode_queue *queue;
> struct amdgpu_db_info db_info;
> - bool skip_map_queue;
> + bool skip_map_queue = false, sem_held = false;
Please don't put lock status into a local variable, that is usually a really
bad idea.
> u32 qid;
> uint64_t index;
> int r = 0;
> @@ -818,17 +818,15 @@ amdgpu_userq_create(struct drm_file *filp, union
> drm_amdgpu_userq *args)
> amdgpu_userq_input_va_validate(adev, queue, args->in.rptr_va,
> AMDGPU_GPU_PAGE_SIZE) ||
> amdgpu_userq_input_va_validate(adev, queue, args->in.wptr_va,
> AMDGPU_GPU_PAGE_SIZE)) {
> r = -EINVAL;
> - kfree(queue);
> - goto unlock;
> + goto free_queue;
> }
>
> /* Convert relative doorbell offset into absolute doorbell index */
> index = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp);
> if (index == (uint64_t)-EINVAL) {
> drm_file_err(uq_mgr->file, "Failed to get doorbell for
> queue\n");
> - kfree(queue);
> r = -EINVAL;
> - goto unlock;
> + goto free_queue;
> }
>
> queue->doorbell_index = index;
> @@ -836,15 +834,13 @@ amdgpu_userq_create(struct drm_file *filp, union
> drm_amdgpu_userq *args)
> r = amdgpu_userq_fence_driver_alloc(adev, queue);
> if (r) {
> drm_file_err(uq_mgr->file, "Failed to alloc fence driver\n");
> - goto unlock;
> + goto free_queue;
> }
>
> r = uq_funcs->mqd_create(queue, &args->in);
> if (r) {
> drm_file_err(uq_mgr->file, "Failed to create Queue\n");
> - amdgpu_userq_fence_driver_free(queue);
> - kfree(queue);
> - goto unlock;
> + goto clean_fence_driver;
> }
>
> /* don't map the queue if scheduling is halted */
> @@ -852,16 +848,12 @@ amdgpu_userq_create(struct drm_file *filp, union
> drm_amdgpu_userq *args)
> ((queue->queue_type == AMDGPU_HW_IP_GFX) ||
> (queue->queue_type == AMDGPU_HW_IP_COMPUTE)))
> skip_map_queue = true;
> - else
> - skip_map_queue = false;
> +
> if (!skip_map_queue) {
> r = amdgpu_userq_map_helper(queue);
> if (r) {
> drm_file_err(uq_mgr->file, "Failed to map Queue\n");
> - uq_funcs->mqd_destroy(queue);
> - amdgpu_userq_fence_driver_free(queue);
> - kfree(queue);
> - goto unlock;
> + goto clean_mqd;
> }
> }
>
> @@ -870,18 +862,15 @@ amdgpu_userq_create(struct drm_file *filp, union
> drm_amdgpu_userq *args)
>
> /* Wait for mode-1 reset to complete */
> down_read(&adev->reset_domain->sem);
> + sem_held = true;
> r = xa_alloc(&uq_mgr->userq_xa, &qid, queue,
> XA_LIMIT(1, AMDGPU_MAX_USERQ_COUNT), GFP_KERNEL);
> if (r) {
> if (!skip_map_queue)
> amdgpu_userq_unmap_helper(queue);
>
> - uq_funcs->mqd_destroy(queue);
> - amdgpu_userq_fence_driver_free(queue);
> - kfree(queue);
> r = -ENOMEM;
> - up_read(&adev->reset_domain->sem);
> - goto unlock;
> + goto clean_mqd;
> }
>
> r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue,
> GFP_KERNEL));
> @@ -890,11 +879,7 @@ amdgpu_userq_create(struct drm_file *filp, union
> drm_amdgpu_userq *args)
> if (!skip_map_queue)
> amdgpu_userq_unmap_helper(queue);
>
> - uq_funcs->mqd_destroy(queue);
> - amdgpu_userq_fence_driver_free(queue);
> - kfree(queue);
> - up_read(&adev->reset_domain->sem);
> - goto unlock;
> + goto clean_mqd;
> }
> up_read(&adev->reset_domain->sem);
>
> @@ -910,7 +895,17 @@ amdgpu_userq_create(struct drm_file *filp, union
> drm_amdgpu_userq *args)
>
> args->out.queue_id = qid;
> atomic_inc(&uq_mgr->userq_count[queue->queue_type]);
> + mutex_unlock(&uq_mgr->userq_mutex);
> + return 0;
>
> +clean_mqd:
> + uq_funcs->mqd_destroy(queue);
> + if (sem_held)
> + up_read(&adev->reset_domain->sem);
To be able to savely call uq_funcs->mqd_destroy() we must hold
adev->reset_domain->sem.
So when that is somehow called without holding that then we have a bug here.
Regards,
Christian.
> +clean_fence_driver:
> + amdgpu_userq_fence_driver_free(queue);
> +free_queue:
> + kfree(queue);
> unlock:
> mutex_unlock(&uq_mgr->userq_mutex);
>