On 5/4/26 14:56, Sunil Khatri wrote:
> Use drm_exec to take both locks i.e vm root bo and
> wptr_obj bo to access the mapping data properly.
> 
> This fixes the security issue of unmap the wptr_obj while
> a queue creation is in progress and passing other
> bo at same address.
> 
> Signed-off-by: Sunil Khatri <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 122 ++++++++++-----------
>  1 file changed, 57 insertions(+), 65 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c 
> b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> index 501e2e10b4a6..3d4f83015488 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> @@ -30,34 +30,6 @@
>  #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
>  #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
>  
> -static int
> -mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo)
> -{
> -     int ret;
> -
> -     ret = amdgpu_bo_reserve(bo, true);
> -     if (ret) {
> -             DRM_ERROR("Failed to reserve bo. ret %d\n", ret);
> -             goto err_reserve_bo_failed;
> -     }
> -
> -     ret = amdgpu_ttm_alloc_gart(&bo->tbo);
> -     if (ret) {
> -             DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
> -             goto err_map_bo_gart_failed;
> -     }
> -
> -     amdgpu_bo_unreserve(bo);
> -     bo = amdgpu_bo_ref(bo);
> -
> -     return 0;
> -
> -err_map_bo_gart_failed:
> -     amdgpu_bo_unreserve(bo);
> -err_reserve_bo_failed:
> -     return ret;
> -}
> -
>  static int
>  mes_userq_create_wptr_mapping(struct amdgpu_device *adev,
>                             struct amdgpu_userq_mgr *uq_mgr,
> @@ -65,55 +37,75 @@ mes_userq_create_wptr_mapping(struct amdgpu_device *adev,
>                             uint64_t wptr)
>  {
>       struct amdgpu_bo_va_mapping *wptr_mapping;
> -     struct amdgpu_vm *wptr_vm;
>       struct amdgpu_userq_obj *wptr_obj = &queue->wptr_obj;
> +     struct amdgpu_bo *obj;
> +     struct amdgpu_vm *vm = queue->vm;
> +     struct drm_exec exec;
>       int ret;
>  
> -     wptr_vm = queue->vm;
> -     ret = amdgpu_bo_reserve(wptr_vm->root.bo, false);
> -     if (ret)
> -             return ret;
> -
>       wptr &= AMDGPU_GMC_HOLE_MASK;
> -     wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT);
> -     amdgpu_bo_unreserve(wptr_vm->root.bo);
> -     if (!wptr_mapping) {
> -             DRM_ERROR("Failed to lookup wptr bo\n");
> -             return -EINVAL;
> -     }
>  
> -     wptr_obj->obj = wptr_mapping->bo_va->base.bo;
> -     if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) {
> -             DRM_ERROR("Requested GART mapping for wptr bo larger than one 
> page\n");
> -             return -EINVAL;
> -     }
> +     drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);

This should probably be 2 instead of 0.

> +     drm_exec_until_all_locked(&exec) {
> +             ret = amdgpu_vm_lock_pd(vm, &exec, 1);
> +             drm_exec_retry_on_contention(&exec);
> +             if (unlikely(ret))
> +                     goto fail_lock;
> +
> +             wptr_mapping = amdgpu_vm_bo_lookup_mapping(vm, wptr >> 
> PAGE_SHIFT);
> +             if (!wptr_mapping) {
> +                     DRM_ERROR("Failed to lock up wptr bo\n");

Please drop that error message. It can spam the logs when userspace 
intentionally gives incorrect values.

> +                     ret = -EINVAL;
> +                     goto fail_lock;
> +             }
>  
> -     ret = mes_userq_map_gtt_bo_to_gart(wptr_obj->obj);
> -     if (ret) {
> -             DRM_ERROR("Failed to map wptr bo to GART\n");
> -             return ret;
> -     }
> +             obj = wptr_mapping->bo_va->base.bo;
> +             ret = drm_exec_prepare_obj(&exec, &obj->tbo.base, 1);

Using drm_exec_lock_obj() should be sufficient.

We don't need a fence slot for this use case.

> +             drm_exec_retry_on_contention(&exec);
> +             if (unlikely(ret)) {
> +                     DRM_ERROR("Failed to prepare wptr bo\n");

Same here, that this can fail is normal handling. The worst case is OOM and 
that is already printed in the logs.

> +                     goto fail_lock;
> +             }

We got all the locks now, so the drm_exec_until_all_locked() loop can be closed 
here.

>  
> -     ret = amdgpu_bo_reserve(wptr_obj->obj, true);
> -     if (ret) {
> -             DRM_ERROR("Failed to reserve wptr bo\n");
> -             return ret;
> -     }


> +             /* mapping now should be stable since both the locks are held */
> +             wptr_mapping = amdgpu_vm_bo_lookup_mapping(vm, wptr >> 
> PAGE_SHIFT);
> +             if (!wptr_mapping) {
> +                     DRM_ERROR("Failed to lock up wptr bo\n");
> +                     ret = -EINVAL;
> +                     goto fail_lock;
> +             }

Doing that again is unecessary. We are holding the VM lock above while doing 
the lockup.

>  
> -     /* TODO use eviction fence instead of pinning. */
> -     ret = amdgpu_bo_pin(wptr_obj->obj, AMDGPU_GEM_DOMAIN_GTT);
> -     if (ret) {
> -             drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin 
> wptr bo\n");
> -             goto unresv_bo;
> -     }
> +             wptr_obj->obj = amdgpu_bo_ref(wptr_mapping->bo_va->base.bo);

That is now unecessary as well.

> +
> +             if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) {
> +                     DRM_ERROR("Requested wptr bo size is larger than one 
> page\n");

Same, please drop that error message.

> +                     ret = -EINVAL;
> +                     goto fail_map;
> +             }
> +
> +             ret = amdgpu_ttm_alloc_gart(&wptr_obj->obj->tbo);
> +             if (ret) {
> +                     DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);

That error message is useful.

> +                     goto fail_map;
> +             }
> +
> +             /* TODO use eviction fence instead of pinning. */
> +             ret = amdgpu_bo_pin(wptr_obj->obj, AMDGPU_GEM_DOMAIN_GTT);

Oh! The piun needs to come before the alloc_gart! That's wrong in the existing 
code as well.

> +             if (ret) {
> +                     DRM_ERROR("Failed to pin wptr bo. ret %d\n", ret);

That error message makes sense.

Regards,
Christian.

> +                     goto fail_map;
> +             }
>  
> -     queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset(wptr_obj->obj);
> -     amdgpu_bo_unreserve(wptr_obj->obj);
> +             queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset(wptr_obj->obj);
> +     }
>  
> +     drm_exec_fini(&exec);
>       return 0;
>  
> -unresv_bo:
> -     amdgpu_bo_unreserve(wptr_obj->obj);
> +fail_map:
> +     amdgpu_bo_unref(&wptr_obj->obj);
> +fail_lock:
> +     drm_exec_fini(&exec);
>       return ret;
>  
>  }

Reply via email to