That was actually complete nonsense and not validating the BOs
at all. The code just cleared all VM areas were it couldn't grab the
lock for a BO.

Try to fix this. Only compile tested at the moment.

Signed-off-by: Christian König <christian.koe...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 97 +++++++++--------------
 1 file changed, 37 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 424831997cb1..3b1aae26a6c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -546,32 +546,52 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
 }
 
 static int
-amdgpu_userq_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)
+amdgpu_userq_validate_bos(struct amdgpu_device *adev, struct drm_exec *exec,
+                         struct amdgpu_vm *vm)
 {
        struct ttm_operation_ctx ctx = { false, false };
+       struct amdgpu_bo_va *bo_va;
+       struct amdgpu_bo *bo;
        int ret;
 
-       amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+       spin_lock(&vm->status_lock);
+       while (!list_empty(&vm->invalidated)) {
+               bo_va = list_first_entry(&vm->invalidated,
+                                        struct amdgpu_bo_va,
+                                        base.vm_status);
+               spin_unlock(&vm->status_lock);
+
+               bo = bo_va->base.bo;
+               ret = drm_exec_lock_obj(exec, &bo->tbo.base);
+               if (unlikely(ret))
+                       return ret;
 
-       ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
-       if (ret)
-               DRM_ERROR("Fail to validate\n");
+               amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+               ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+               if (ret)
+                       return ret;
 
-       return ret;
+               /* This moves the bo_va to the done list */
+               ret = amdgpu_vm_bo_update(adev, bo_va, false);
+               if (ret)
+                       return ret;
+
+               spin_lock(&vm->status_lock);
+       }
+       spin_unlock(&vm->status_lock);
+
+       return 0;
 }
 
 static int
-amdgpu_userq_validate_bos(struct amdgpu_userq_mgr *uq_mgr)
+amdgpu_userq_update_vm(struct amdgpu_userq_mgr *uq_mgr)
 {
        struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
-       struct amdgpu_vm *vm = &fpriv->vm;
        struct amdgpu_device *adev = uq_mgr->adev;
+       struct amdgpu_vm *vm = &fpriv->vm;
        struct amdgpu_bo_va *bo_va;
-       struct ww_acquire_ctx *ticket;
        struct drm_exec exec;
        struct amdgpu_bo *bo;
-       struct dma_resv *resv;
-       bool clear, unlock;
        int ret = 0;
 
        drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
@@ -594,59 +614,16 @@ amdgpu_userq_validate_bos(struct amdgpu_userq_mgr *uq_mgr)
                        if (unlikely(ret))
                                goto unlock_all;
                }
-       }
-
-       spin_lock(&vm->status_lock);
-       while (!list_empty(&vm->moved)) {
-               bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va,
-                                        base.vm_status);
-               spin_unlock(&vm->status_lock);
 
-               /* Per VM BOs never need to bo cleared in the page tables */
-               ret = amdgpu_vm_bo_update(adev, bo_va, false);
+               ret = amdgpu_userq_validate_bos(adev, &exec, vm);
+               drm_exec_retry_on_contention(&exec);
                if (ret)
                        goto unlock_all;
-               spin_lock(&vm->status_lock);
        }
 
-       ticket = &exec.ticket;
-       while (!list_empty(&vm->invalidated)) {
-               bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
-                                        base.vm_status);
-               resv = bo_va->base.bo->tbo.base.resv;
-               spin_unlock(&vm->status_lock);
-
-               bo = bo_va->base.bo;
-               ret = amdgpu_userq_validate_vm_bo(NULL, bo);
-               if (ret) {
-                       drm_file_err(uq_mgr->file, "Failed to validate BO\n");
-                       goto unlock_all;
-               }
-
-               /* Try to reserve the BO to avoid clearing its ptes */
-               if (!adev->debug_vm && dma_resv_trylock(resv)) {
-                       clear = false;
-                       unlock = true;
-               /* The caller is already holding the reservation lock */
-               } else if (dma_resv_locking_ctx(resv) == ticket) {
-                       clear = false;
-                       unlock = false;
-               /* Somebody else is using the BO right now */
-               } else {
-                       clear = true;
-                       unlock = false;
-               }
-
-               ret = amdgpu_vm_bo_update(adev, bo_va, clear);
-
-               if (unlock)
-                       dma_resv_unlock(resv);
-               if (ret)
-                       goto unlock_all;
-
-               spin_lock(&vm->status_lock);
-       }
-       spin_unlock(&vm->status_lock);
+       ret = amdgpu_vm_handle_moved(adev, vm, NULL);
+       if (ret)
+               goto unlock_all;
 
        ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec);
        if (ret)
@@ -667,7 +644,7 @@ static void amdgpu_userq_restore_worker(struct work_struct 
*work)
 
        mutex_lock(&uq_mgr->userq_mutex);
 
-       ret = amdgpu_userq_validate_bos(uq_mgr);
+       ret = amdgpu_userq_update_vm(uq_mgr);
        if (ret) {
                drm_file_err(uq_mgr->file, "Failed to validate BOs to 
restore\n");
                goto unlock;
-- 
2.43.0

Reply via email to