amdgpu: extract amdgpu_vm_lock_by_pasid from amdgpu_vm_handle_fault

Pierre-Eric Pelloux-Prayer Wed, 11 Feb 2026 02:29:27 -0800

This is tricky to implement right and we're going to need
it from the devcoredump.


Signed-off-by: Pierre-Eric Pelloux-Prayer <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 84 +++++++++++++++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  3 +
 2 files changed, 57 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 807f8bcc7de5..6a5b3e148554 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2930,6 +2930,50 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
        return 0;
 }
 
+/**
+ * amdgpu_vm_lock_by_pasid - return an amdgpu_vm and its root bo from a pasid, 
if possible.
+ * @adev: amdgpu device pointer
+ * @root: root BO of the VM
+ * @pasid: PASID of the VM
+ * The caller needs to unreserve and unref the root bo on success.
+ */
+struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct amdgpu_device *adev,
+                                         struct amdgpu_bo **root, u32 pasid)
+{
+       unsigned long irqflags;
+       struct amdgpu_vm *vm;
+       int r;
+
+       xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
+       vm = xa_load(&adev->vm_manager.pasids, pasid);
+       *root = vm ? amdgpu_bo_ref(vm->root.bo) : NULL;
+       xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
+
+       if (!*root)
+               return NULL;
+
+       r = amdgpu_bo_reserve(*root, true);
+       if (r)
+               goto error_unref;
+
+       /* Double check that the VM still exists */
+       xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
+       vm = xa_load(&adev->vm_manager.pasids, pasid);
+       if (vm && vm->root.bo != *root)
+               vm = NULL;
+       xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
+       if (!vm)
+               goto error_unlock;
+
+       return vm;
+error_unlock:
+       amdgpu_bo_unreserve(*root);
+
+error_unref:
+       amdgpu_bo_unref(root);
+       return NULL;
+}
+
 /**
  * amdgpu_vm_handle_fault - graceful handling of VM faults.
  * @adev: amdgpu device pointer
@@ -2945,50 +2989,31 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
  * shouldn't be reported any more.
  */
 bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
-                           u32 vmid, u32 node_id, uint64_t addr, uint64_t ts,
-                           bool write_fault)
+                           u32 vmid, u32 node_id, uint64_t addr,
+                           uint64_t ts, bool write_fault)
 {
        bool is_compute_context = false;
        struct amdgpu_bo *root;
-       unsigned long irqflags;
        uint64_t value, flags;
        struct amdgpu_vm *vm;
        int r;
 
-       xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
-       vm = xa_load(&adev->vm_manager.pasids, pasid);
-       if (vm) {
-               root = amdgpu_bo_ref(vm->root.bo);
-               is_compute_context = vm->is_compute_context;
-       } else {
-               root = NULL;
-       }
-       xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
-
-       if (!root)
+       vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid);
+       if (!vm)
                return false;
 
+       is_compute_context = vm->is_compute_context;
+
        addr /= AMDGPU_GPU_PAGE_SIZE;
 
-       if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid,
-           node_id, addr, ts, write_fault)) {
+       if (is_compute_context &&
+               !svm_range_restore_pages(adev, pasid, vmid, node_id, addr,
+                                       ts, write_fault)) {
+               amdgpu_bo_unreserve(root);
                amdgpu_bo_unref(&root);
                return true;
        }
 
-       r = amdgpu_bo_reserve(root, true);
-       if (r)
-               goto error_unref;
-
-       /* Double check that the VM still exists */
-       xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
-       vm = xa_load(&adev->vm_manager.pasids, pasid);
-       if (vm && vm->root.bo != root)
-               vm = NULL;
-       xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
-       if (!vm)
-               goto error_unlock;
-
        flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
                AMDGPU_PTE_SYSTEM;
 
@@ -3027,7 +3052,6 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
        if (r < 0)
                dev_err(adev->dev, "Can't handle page fault (%d)\n", r);
 
-error_unref:
        amdgpu_bo_unref(&root);
 
        return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 139642eacdd0..2051eda55c99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -589,6 +589,9 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 
pasid,
                            u32 vmid, u32 node_id, uint64_t addr, uint64_t ts,
                            bool write_fault);
 
+struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct amdgpu_device *adev,
+                                         struct amdgpu_bo **root, u32 pasid);
+
 void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
 
 void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
-- 
2.43.0

[PATCH v1 4/6] drm/amdgpu: extract amdgpu_vm_lock_by_pasid from amdgpu_vm_handle_fault

Reply via email to