This is tricky to implement right and we're going to need it from the devcoredump.
Signed-off-by: Pierre-Eric Pelloux-Prayer <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 84 +++++++++++++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 + 2 files changed, 57 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 807f8bcc7de5..6a5b3e148554 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2930,6 +2930,50 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) return 0; } +/** + * amdgpu_vm_lock_by_pasid - return an amdgpu_vm and its root bo from a pasid, if possible. + * @adev: amdgpu device pointer + * @root: root BO of the VM + * @pasid: PASID of the VM + * The caller needs to unreserve and unref the root bo on success. + */ +struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct amdgpu_device *adev, + struct amdgpu_bo **root, u32 pasid) +{ + unsigned long irqflags; + struct amdgpu_vm *vm; + int r; + + xa_lock_irqsave(&adev->vm_manager.pasids, irqflags); + vm = xa_load(&adev->vm_manager.pasids, pasid); + *root = vm ? amdgpu_bo_ref(vm->root.bo) : NULL; + xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags); + + if (!*root) + return NULL; + + r = amdgpu_bo_reserve(*root, true); + if (r) + goto error_unref; + + /* Double check that the VM still exists */ + xa_lock_irqsave(&adev->vm_manager.pasids, irqflags); + vm = xa_load(&adev->vm_manager.pasids, pasid); + if (vm && vm->root.bo != *root) + vm = NULL; + xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags); + if (!vm) + goto error_unlock; + + return vm; +error_unlock: + amdgpu_bo_unreserve(*root); + +error_unref: + amdgpu_bo_unref(root); + return NULL; +} + /** * amdgpu_vm_handle_fault - graceful handling of VM faults. * @adev: amdgpu device pointer @@ -2945,50 +2989,31 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) * shouldn't be reported any more. */ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - u32 vmid, u32 node_id, uint64_t addr, uint64_t ts, - bool write_fault) + u32 vmid, u32 node_id, uint64_t addr, + uint64_t ts, bool write_fault) { bool is_compute_context = false; struct amdgpu_bo *root; - unsigned long irqflags; uint64_t value, flags; struct amdgpu_vm *vm; int r; - xa_lock_irqsave(&adev->vm_manager.pasids, irqflags); - vm = xa_load(&adev->vm_manager.pasids, pasid); - if (vm) { - root = amdgpu_bo_ref(vm->root.bo); - is_compute_context = vm->is_compute_context; - } else { - root = NULL; - } - xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags); - - if (!root) + vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid); + if (!vm) return false; + is_compute_context = vm->is_compute_context; + addr /= AMDGPU_GPU_PAGE_SIZE; - if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid, - node_id, addr, ts, write_fault)) { + if (is_compute_context && + !svm_range_restore_pages(adev, pasid, vmid, node_id, addr, + ts, write_fault)) { + amdgpu_bo_unreserve(root); amdgpu_bo_unref(&root); return true; } - r = amdgpu_bo_reserve(root, true); - if (r) - goto error_unref; - - /* Double check that the VM still exists */ - xa_lock_irqsave(&adev->vm_manager.pasids, irqflags); - vm = xa_load(&adev->vm_manager.pasids, pasid); - if (vm && vm->root.bo != root) - vm = NULL; - xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags); - if (!vm) - goto error_unlock; - flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED | AMDGPU_PTE_SYSTEM; @@ -3027,7 +3052,6 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, if (r < 0) dev_err(adev->dev, "Can't handle page fault (%d)\n", r); -error_unref: amdgpu_bo_unref(&root); return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 139642eacdd0..2051eda55c99 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -589,6 +589,9 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, u32 vmid, u32 node_id, uint64_t addr, uint64_t ts, bool write_fault); +struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct amdgpu_device *adev, + struct amdgpu_bo **root, u32 pasid); + void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, -- 2.43.0
