On 2019-12-05 8:39 a.m., Christian König wrote:
This allows to invalidate VM entries without taking the reservation lock.

Signed-off-by: Christian König <christian.koe...@amd.com>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 39 +++++++++++++++++++++-----
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  4 +++
  2 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0d700e8154c4..839d6df394fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -656,7 +656,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
                              void *param)
  {
        struct amdgpu_vm_bo_base *bo_base, *tmp;
-       int r = 0;
+       int r;
vm->bulk_moveable &= list_empty(&vm->evicted); @@ -665,7 +665,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, r = validate(param, bo);
                if (r)
-                       break;
+                       return r;
if (bo->tbo.type != ttm_bo_type_kernel) {
                        amdgpu_vm_bo_moved(bo_base);
@@ -678,7 +678,11 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
                }
        }
- return r;
+       mutex_lock(&vm->eviction_lock);
+       vm->evicting = false;
+       mutex_unlock(&vm->eviction_lock);
+
+       return 0;
  }
/**
@@ -1555,15 +1559,25 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
        if (!(flags & AMDGPU_PTE_VALID))
                owner = AMDGPU_FENCE_OWNER_KFD;
+ mutex_lock(&vm->eviction_lock);
+       if (vm->evicting) {
+               r = EINPROGRESS;

Not sure about this error code. As far as I can find, this is normally used on non-blocking sockets. I found this explanation: https://stackoverflow.com/questions/8277970/what-are-possible-reason-for-socket-error-einprogress-in-solaris

Quote: "so there is another error for non-blocking connect: EINPROGRESS, which tells you that the operation is in progress and you should check its status later."

This call is neither non-blocking nor is the requested page table update in progress when this error is returned. So I'd think a better error to return here would be EBUSY.

Other than that, this patch is

Reviewed-by: Felix Kuehling <felix.kuehl...@amd.com>


+               goto error_unlock;
+       }
+
        r = vm->update_funcs->prepare(&params, owner, exclusive);
        if (r)
-               return r;
+               goto error_unlock;
r = amdgpu_vm_update_ptes(&params, start, last + 1, addr, flags);
        if (r)
-               return r;
+               goto error_unlock;
- return vm->update_funcs->commit(&params, fence);
+       r = vm->update_funcs->commit(&params, fence);
+
+error_unlock:
+       mutex_unlock(&vm->eviction_lock);
+       return r;
  }
/**
@@ -2522,11 +2536,19 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
        if (!dma_resv_test_signaled_rcu(bo->tbo.base.resv, true))
                return false;
+ /* Try to block ongoing updates */
+       if (!mutex_trylock(&bo_base->vm->eviction_lock))
+               return false;
+
        /* Don't evict VM page tables while they are updated */
        if (!dma_fence_is_signaled(bo_base->vm->last_direct) ||
-           !dma_fence_is_signaled(bo_base->vm->last_delayed))
+           !dma_fence_is_signaled(bo_base->vm->last_delayed)) {
+               mutex_unlock(&bo_base->vm->eviction_lock);
                return false;
+       }
+ bo_base->vm->evicting = true;
+       mutex_unlock(&bo_base->vm->eviction_lock);
        return true;
  }
@@ -2773,6 +2795,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
        vm->last_direct = dma_fence_get_stub();
        vm->last_delayed = dma_fence_get_stub();
+ mutex_init(&vm->eviction_lock);
+       vm->evicting = false;
+
        amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, &bp);
        if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
                bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index d93ea9ad879e..d5613d184e99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -242,6 +242,10 @@ struct amdgpu_vm {
        /* tree of virtual addresses mapped */
        struct rb_root_cached   va;
+ /* Lock to prevent eviction while we are updating page tables */
+       struct mutex            eviction_lock;
+       bool                    evicting;
+
        /* BOs who needs a validation */
        struct list_head        evicted;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to