The VM was not locked in the past since we initially only cleared the linked list element and not added it to any VM state.
But this has changed quite some time ago, we just never realized this problem because the VM state lock was masking it. Signed-off-by: Christian König <[email protected]> --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 19 +++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 22 ++++++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 10 +++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 ++ 4 files changed, 42 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 768998c82b43..ec5130497743 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -878,6 +878,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, struct amdgpu_bo *bo[2] = {NULL, NULL}; struct amdgpu_bo_va *bo_va; bool same_hive = false; + struct drm_exec exec; int i, ret; if (!va) { @@ -958,19 +959,25 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, goto unwind; } - /* Add BO to VM internal data structures */ - ret = amdgpu_bo_reserve(bo[i], false); - if (ret) { - pr_debug("Unable to reserve BO during memory attach"); - goto unwind; + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_until_all_locked(&exec) { + ret = amdgpu_vm_lock_pd(vm, &exec, 0); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto unwind; + ret = drm_exec_lock_obj(&exec, &bo[i]->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto unwind; } + bo_va = amdgpu_vm_bo_find(vm, bo[i]); if (!bo_va) bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]); else ++bo_va->ref_count; attachment[i]->bo_va = bo_va; - amdgpu_bo_unreserve(bo[i]); + drm_exec_fini(&exec); if (unlikely(!attachment[i]->bo_va)) { ret = -ENOMEM; pr_err("Failed to add BO object to VM. ret == %d\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 5f9fa2140f09..5c90de58cc28 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -232,6 +232,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_va *bo_va; struct mm_struct *mm; + struct drm_exec exec; int r; mm = amdgpu_ttm_tt_get_usermm(abo->tbo.ttm); @@ -242,9 +243,18 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, !amdgpu_vm_is_bo_always_valid(vm, abo)) return -EPERM; - r = amdgpu_bo_reserve(abo, false); - if (r) - return r; + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); + drm_exec_until_all_locked(&exec) { + r = drm_exec_prepare_obj(&exec, &abo->tbo.base, 1); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto out_unlock; + + r = amdgpu_vm_lock_pd(vm, &exec, 0); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto out_unlock; + } amdgpu_vm_bo_update_shared(abo); bo_va = amdgpu_vm_bo_find(vm, abo); @@ -260,8 +270,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, amdgpu_bo_unreserve(abo); return r; } - - amdgpu_bo_unreserve(abo); + drm_exec_fini(&exec); /* Validate and add eviction fence to DMABuf imports with dynamic * attachment in compute VMs. Re-validation will be done by @@ -294,7 +303,10 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, } } mutex_unlock(&vm->process_info->lock); + return r; +out_unlock: + drm_exec_fini(&exec); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 1878e0faa722..f69332eed051 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -1445,6 +1445,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) { struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_fpriv *fpriv; + struct drm_exec exec; int r, pasid; /* Ensure IB tests are run on ring */ @@ -1484,7 +1485,16 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) if (r) goto error_pasid; + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); + drm_exec_until_all_locked(&exec) { + r = amdgpu_vm_lock_pd(&fpriv->vm, &exec, 0); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto error_vm; + } + fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL); + drm_exec_fini(&exec); if (!fpriv->prt_va) { r = -ENOMEM; goto error_vm; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 6a2ea200d90c..b4bf1b7c214f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1735,6 +1735,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, { struct amdgpu_bo_va *bo_va; + amdgpu_vm_assert_locked(vm); + bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL); if (bo_va == NULL) { return NULL; -- 2.43.0
