Re: [PATCH 4/5] drm/amdgpu: add VM eviction lock v2

2019-12-05 Thread Felix Kuehling

On 2019-12-05 8:39 a.m., Christian König wrote:

This allows to invalidate VM entries without taking the reservation lock.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 39 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  4 +++
  2 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0d700e8154c4..839d6df394fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -656,7 +656,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
  void *param)
  {
struct amdgpu_vm_bo_base *bo_base, *tmp;
-   int r = 0;
+   int r;
  
  	vm->bulk_moveable &= list_empty(>evicted);
  
@@ -665,7 +665,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  
  		r = validate(param, bo);

if (r)
-   break;
+   return r;
  
  		if (bo->tbo.type != ttm_bo_type_kernel) {

amdgpu_vm_bo_moved(bo_base);
@@ -678,7 +678,11 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
}
}
  
-	return r;

+   mutex_lock(>eviction_lock);
+   vm->evicting = false;
+   mutex_unlock(>eviction_lock);
+
+   return 0;
  }
  
  /**

@@ -1555,15 +1559,25 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
if (!(flags & AMDGPU_PTE_VALID))
owner = AMDGPU_FENCE_OWNER_KFD;
  
+	mutex_lock(>eviction_lock);

+   if (vm->evicting) {
+   r = EINPROGRESS;


Not sure about this error code. As far as I can find, this is normally 
used on non-blocking sockets. I found this explanation: 
https://stackoverflow.com/questions/8277970/what-are-possible-reason-for-socket-error-einprogress-in-solaris


Quote: "so there is another error for non-blocking connect: EINPROGRESS, 
which tells you that the operation is in progress and you should check 
its status later."


This call is neither non-blocking nor is the requested page table update 
in progress when this error is returned. So I'd think a better error to 
return here would be EBUSY.


Other than that, this patch is

Reviewed-by: Felix Kuehling 



+   goto error_unlock;
+   }
+
r = vm->update_funcs->prepare(, owner, exclusive);
if (r)
-   return r;
+   goto error_unlock;
  
  	r = amdgpu_vm_update_ptes(, start, last + 1, addr, flags);

if (r)
-   return r;
+   goto error_unlock;
  
-	return vm->update_funcs->commit(, fence);

+   r = vm->update_funcs->commit(, fence);
+
+error_unlock:
+   mutex_unlock(>eviction_lock);
+   return r;
  }
  
  /**

@@ -2522,11 +2536,19 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
if (!dma_resv_test_signaled_rcu(bo->tbo.base.resv, true))
return false;
  
+	/* Try to block ongoing updates */

+   if (!mutex_trylock(_base->vm->eviction_lock))
+   return false;
+
/* Don't evict VM page tables while they are updated */
if (!dma_fence_is_signaled(bo_base->vm->last_direct) ||
-   !dma_fence_is_signaled(bo_base->vm->last_delayed))
+   !dma_fence_is_signaled(bo_base->vm->last_delayed)) {
+   mutex_unlock(_base->vm->eviction_lock);
return false;
+   }
  
+	bo_base->vm->evicting = true;

+   mutex_unlock(_base->vm->eviction_lock);
return true;
  }
  
@@ -2773,6 +2795,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,

vm->last_direct = dma_fence_get_stub();
vm->last_delayed = dma_fence_get_stub();
  
+	mutex_init(>eviction_lock);

+   vm->evicting = false;
+
amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, );
if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index d93ea9ad879e..d5613d184e99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -242,6 +242,10 @@ struct amdgpu_vm {
/* tree of virtual addresses mapped */
struct rb_root_cached   va;
  
+	/* Lock to prevent eviction while we are updating page tables */

+   struct mutexeviction_lock;
+   boolevicting;
+
/* BOs who needs a validation */
struct list_headevicted;
  

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 4/5] drm/amdgpu: add VM eviction lock v2

2019-12-05 Thread Christian König
This allows to invalidate VM entries without taking the reservation lock.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 39 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  4 +++
 2 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0d700e8154c4..839d6df394fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -656,7 +656,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
  void *param)
 {
struct amdgpu_vm_bo_base *bo_base, *tmp;
-   int r = 0;
+   int r;
 
vm->bulk_moveable &= list_empty(>evicted);
 
@@ -665,7 +665,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
 
r = validate(param, bo);
if (r)
-   break;
+   return r;
 
if (bo->tbo.type != ttm_bo_type_kernel) {
amdgpu_vm_bo_moved(bo_base);
@@ -678,7 +678,11 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
}
}
 
-   return r;
+   mutex_lock(>eviction_lock);
+   vm->evicting = false;
+   mutex_unlock(>eviction_lock);
+
+   return 0;
 }
 
 /**
@@ -1555,15 +1559,25 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
if (!(flags & AMDGPU_PTE_VALID))
owner = AMDGPU_FENCE_OWNER_KFD;
 
+   mutex_lock(>eviction_lock);
+   if (vm->evicting) {
+   r = EINPROGRESS;
+   goto error_unlock;
+   }
+
r = vm->update_funcs->prepare(, owner, exclusive);
if (r)
-   return r;
+   goto error_unlock;
 
r = amdgpu_vm_update_ptes(, start, last + 1, addr, flags);
if (r)
-   return r;
+   goto error_unlock;
 
-   return vm->update_funcs->commit(, fence);
+   r = vm->update_funcs->commit(, fence);
+
+error_unlock:
+   mutex_unlock(>eviction_lock);
+   return r;
 }
 
 /**
@@ -2522,11 +2536,19 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
if (!dma_resv_test_signaled_rcu(bo->tbo.base.resv, true))
return false;
 
+   /* Try to block ongoing updates */
+   if (!mutex_trylock(_base->vm->eviction_lock))
+   return false;
+
/* Don't evict VM page tables while they are updated */
if (!dma_fence_is_signaled(bo_base->vm->last_direct) ||
-   !dma_fence_is_signaled(bo_base->vm->last_delayed))
+   !dma_fence_is_signaled(bo_base->vm->last_delayed)) {
+   mutex_unlock(_base->vm->eviction_lock);
return false;
+   }
 
+   bo_base->vm->evicting = true;
+   mutex_unlock(_base->vm->eviction_lock);
return true;
 }
 
@@ -2773,6 +2795,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
vm->last_direct = dma_fence_get_stub();
vm->last_delayed = dma_fence_get_stub();
 
+   mutex_init(>eviction_lock);
+   vm->evicting = false;
+
amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, );
if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index d93ea9ad879e..d5613d184e99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -242,6 +242,10 @@ struct amdgpu_vm {
/* tree of virtual addresses mapped */
struct rb_root_cached   va;
 
+   /* Lock to prevent eviction while we are updating page tables */
+   struct mutexeviction_lock;
+   boolevicting;
+
/* BOs who needs a validation */
struct list_headevicted;
 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx