On Mon, Feb 2, 2026 at 7:51 AM Christian König <[email protected]> wrote: > > The VM was not locked in the past since we initially only cleared the > linked list element and not added it to any VM state. > > But this has changed quite some time ago, we just never realized this > problem because the VM state lock was masking it. > > Signed-off-by: Christian König <[email protected]>
Reviewed-by: Alex Deucher <[email protected]> > --- > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 19 +++++++++++----- > drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 22 ++++++++++++++----- > drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 10 +++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 ++ > 4 files changed, 42 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > index 768998c82b43..ec5130497743 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > @@ -878,6 +878,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, > struct kgd_mem *mem, > struct amdgpu_bo *bo[2] = {NULL, NULL}; > struct amdgpu_bo_va *bo_va; > bool same_hive = false; > + struct drm_exec exec; > int i, ret; > > if (!va) { > @@ -958,19 +959,25 @@ static int kfd_mem_attach(struct amdgpu_device *adev, > struct kgd_mem *mem, > goto unwind; > } > > - /* Add BO to VM internal data structures */ > - ret = amdgpu_bo_reserve(bo[i], false); > - if (ret) { > - pr_debug("Unable to reserve BO during memory attach"); > - goto unwind; > + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); > + drm_exec_until_all_locked(&exec) { > + ret = amdgpu_vm_lock_pd(vm, &exec, 0); > + drm_exec_retry_on_contention(&exec); > + if (unlikely(ret)) > + goto unwind; > + ret = drm_exec_lock_obj(&exec, &bo[i]->tbo.base); > + drm_exec_retry_on_contention(&exec); > + if (unlikely(ret)) > + goto unwind; > } > + > bo_va = amdgpu_vm_bo_find(vm, bo[i]); > if (!bo_va) > bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]); > else > ++bo_va->ref_count; > attachment[i]->bo_va = bo_va; > - amdgpu_bo_unreserve(bo[i]); > + drm_exec_fini(&exec); > if (unlikely(!attachment[i]->bo_va)) { > ret = -ENOMEM; > pr_err("Failed to add BO object to VM. ret == %d\n", > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > index 5f9fa2140f09..5c90de58cc28 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > @@ -232,6 +232,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object > *obj, > struct amdgpu_vm *vm = &fpriv->vm; > struct amdgpu_bo_va *bo_va; > struct mm_struct *mm; > + struct drm_exec exec; > int r; > > mm = amdgpu_ttm_tt_get_usermm(abo->tbo.ttm); > @@ -242,9 +243,18 @@ static int amdgpu_gem_object_open(struct drm_gem_object > *obj, > !amdgpu_vm_is_bo_always_valid(vm, abo)) > return -EPERM; > > - r = amdgpu_bo_reserve(abo, false); > - if (r) > - return r; > + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); > + drm_exec_until_all_locked(&exec) { > + r = drm_exec_prepare_obj(&exec, &abo->tbo.base, 1); > + drm_exec_retry_on_contention(&exec); > + if (unlikely(r)) > + goto out_unlock; > + > + r = amdgpu_vm_lock_pd(vm, &exec, 0); > + drm_exec_retry_on_contention(&exec); > + if (unlikely(r)) > + goto out_unlock; > + } > > amdgpu_vm_bo_update_shared(abo); > bo_va = amdgpu_vm_bo_find(vm, abo); > @@ -260,8 +270,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object > *obj, > amdgpu_bo_unreserve(abo); > return r; > } > - > - amdgpu_bo_unreserve(abo); > + drm_exec_fini(&exec); > > /* Validate and add eviction fence to DMABuf imports with dynamic > * attachment in compute VMs. Re-validation will be done by > @@ -294,7 +303,10 @@ static int amdgpu_gem_object_open(struct drm_gem_object > *obj, > } > } > mutex_unlock(&vm->process_info->lock); > + return r; > > +out_unlock: > + drm_exec_fini(&exec); > return r; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > index 1878e0faa722..f69332eed051 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > @@ -1445,6 +1445,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, > struct drm_file *file_priv) > { > struct amdgpu_device *adev = drm_to_adev(dev); > struct amdgpu_fpriv *fpriv; > + struct drm_exec exec; > int r, pasid; > > /* Ensure IB tests are run on ring */ > @@ -1484,7 +1485,16 @@ int amdgpu_driver_open_kms(struct drm_device *dev, > struct drm_file *file_priv) > if (r) > goto error_pasid; > > + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); > + drm_exec_until_all_locked(&exec) { > + r = amdgpu_vm_lock_pd(&fpriv->vm, &exec, 0); > + drm_exec_retry_on_contention(&exec); > + if (unlikely(r)) > + goto error_vm; > + } > + > fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL); > + drm_exec_fini(&exec); > if (!fpriv->prt_va) { > r = -ENOMEM; > goto error_vm; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index 6a2ea200d90c..b4bf1b7c214f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -1735,6 +1735,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct > amdgpu_device *adev, > { > struct amdgpu_bo_va *bo_va; > > + amdgpu_vm_assert_locked(vm); > + > bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL); > if (bo_va == NULL) { > return NULL; > -- > 2.43.0 >
