On Mon, Feb 2, 2026 at 7:51 AM Christian König
<[email protected]> wrote:
>
> The VM was not locked in the past since we initially only cleared the
> linked list element and not added it to any VM state.
>
> But this has changed quite some time ago, we just never realized this
> problem because the VM state lock was masking it.
>
> Signed-off-by: Christian König <[email protected]>

Reviewed-by: Alex Deucher <[email protected]>

> ---
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 19 +++++++++++-----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       | 22 ++++++++++++++-----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c       | 10 +++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |  2 ++
>  4 files changed, 42 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 768998c82b43..ec5130497743 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -878,6 +878,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, 
> struct kgd_mem *mem,
>         struct amdgpu_bo *bo[2] = {NULL, NULL};
>         struct amdgpu_bo_va *bo_va;
>         bool same_hive = false;
> +       struct drm_exec exec;
>         int i, ret;
>
>         if (!va) {
> @@ -958,19 +959,25 @@ static int kfd_mem_attach(struct amdgpu_device *adev, 
> struct kgd_mem *mem,
>                         goto unwind;
>                 }
>
> -               /* Add BO to VM internal data structures */
> -               ret = amdgpu_bo_reserve(bo[i], false);
> -               if (ret) {
> -                       pr_debug("Unable to reserve BO during memory attach");
> -                       goto unwind;
> +               drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
> +               drm_exec_until_all_locked(&exec) {
> +                       ret = amdgpu_vm_lock_pd(vm, &exec, 0);
> +                       drm_exec_retry_on_contention(&exec);
> +                       if (unlikely(ret))
> +                               goto unwind;
> +                       ret = drm_exec_lock_obj(&exec, &bo[i]->tbo.base);
> +                       drm_exec_retry_on_contention(&exec);
> +                       if (unlikely(ret))
> +                               goto unwind;
>                 }
> +
>                 bo_va = amdgpu_vm_bo_find(vm, bo[i]);
>                 if (!bo_va)
>                         bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
>                 else
>                         ++bo_va->ref_count;
>                 attachment[i]->bo_va = bo_va;
> -               amdgpu_bo_unreserve(bo[i]);
> +               drm_exec_fini(&exec);
>                 if (unlikely(!attachment[i]->bo_va)) {
>                         ret = -ENOMEM;
>                         pr_err("Failed to add BO object to VM. ret == %d\n",
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 5f9fa2140f09..5c90de58cc28 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -232,6 +232,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object 
> *obj,
>         struct amdgpu_vm *vm = &fpriv->vm;
>         struct amdgpu_bo_va *bo_va;
>         struct mm_struct *mm;
> +       struct drm_exec exec;
>         int r;
>
>         mm = amdgpu_ttm_tt_get_usermm(abo->tbo.ttm);
> @@ -242,9 +243,18 @@ static int amdgpu_gem_object_open(struct drm_gem_object 
> *obj,
>             !amdgpu_vm_is_bo_always_valid(vm, abo))
>                 return -EPERM;
>
> -       r = amdgpu_bo_reserve(abo, false);
> -       if (r)
> -               return r;
> +       drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
> +       drm_exec_until_all_locked(&exec) {
> +               r = drm_exec_prepare_obj(&exec, &abo->tbo.base, 1);
> +               drm_exec_retry_on_contention(&exec);
> +               if (unlikely(r))
> +                       goto out_unlock;
> +
> +               r = amdgpu_vm_lock_pd(vm, &exec, 0);
> +               drm_exec_retry_on_contention(&exec);
> +               if (unlikely(r))
> +                       goto out_unlock;
> +       }
>
>         amdgpu_vm_bo_update_shared(abo);
>         bo_va = amdgpu_vm_bo_find(vm, abo);
> @@ -260,8 +270,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object 
> *obj,
>                 amdgpu_bo_unreserve(abo);
>                 return r;
>         }
> -
> -       amdgpu_bo_unreserve(abo);
> +       drm_exec_fini(&exec);
>
>         /* Validate and add eviction fence to DMABuf imports with dynamic
>          * attachment in compute VMs. Re-validation will be done by
> @@ -294,7 +303,10 @@ static int amdgpu_gem_object_open(struct drm_gem_object 
> *obj,
>                 }
>         }
>         mutex_unlock(&vm->process_info->lock);
> +       return r;
>
> +out_unlock:
> +       drm_exec_fini(&exec);
>         return r;
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 1878e0faa722..f69332eed051 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -1445,6 +1445,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, 
> struct drm_file *file_priv)
>  {
>         struct amdgpu_device *adev = drm_to_adev(dev);
>         struct amdgpu_fpriv *fpriv;
> +       struct drm_exec exec;
>         int r, pasid;
>
>         /* Ensure IB tests are run on ring */
> @@ -1484,7 +1485,16 @@ int amdgpu_driver_open_kms(struct drm_device *dev, 
> struct drm_file *file_priv)
>         if (r)
>                 goto error_pasid;
>
> +       drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
> +       drm_exec_until_all_locked(&exec) {
> +               r = amdgpu_vm_lock_pd(&fpriv->vm, &exec, 0);
> +               drm_exec_retry_on_contention(&exec);
> +               if (unlikely(r))
> +                       goto error_vm;
> +       }
> +
>         fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL);
> +       drm_exec_fini(&exec);
>         if (!fpriv->prt_va) {
>                 r = -ENOMEM;
>                 goto error_vm;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 6a2ea200d90c..b4bf1b7c214f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1735,6 +1735,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct 
> amdgpu_device *adev,
>  {
>         struct amdgpu_bo_va *bo_va;
>
> +       amdgpu_vm_assert_locked(vm);
> +
>         bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL);
>         if (bo_va == NULL) {
>                 return NULL;
> --
> 2.43.0
>

Reply via email to