Integrate the VRAM migration infrastructure into the SVM restore and
boundary realignment paths to ensure correct interaction between
migration and the existing SVM lifecycle framework.

Suppress restore for realigned ranges:
- Add suppress_restore flag to amdgpu_svm_range
- Set flag before svm_restore_realign_boundary() evicts a crossing range
- Prevent migration notifier from enqueueing restore work while
  realignment or VRAM migration is in progress
- Remove existing restore-list entry and drop its queue kref
- On realign failure, clear flag and re-enqueue restore for rollback
- Enqueue, worker retry, and put_if_dequeued paths respect the flag

Extend svm_restore_range() with migration support:
- Move amdgpu_svm_range_is_valid() check before map_ctx setup
- Set devmem_only = false since restore should not force VRAM placement
- Add device_private_page_owner for proper devmem identification

Signed-off-by: Junhua Shen <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.h |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.c   | 76 +++++++++++++++----
 3 files changed, 65 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c
index 88923e6a0194..31229c4770b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c
@@ -728,11 +728,13 @@ amdgpu_svm_range_put_if_dequeued(struct amdgpu_svm *svm,
                list_add_tail(&range->work_node, &svm->gc.list);
                range->queue_state = AMDGPU_SVM_RANGE_IN_GC;
                queue_gc = true;
-       } else if (RESTORE_WORK(range->pending_ops)) {
+       } else if (RESTORE_WORK(range->pending_ops) &&
+                  !READ_ONCE(range->suppress_restore)) {
                list_add_tail(&range->work_node, &svm->restore.list);
                range->queue_state = AMDGPU_SVM_RANGE_IN_RESTORE;
                queue_restore = true;
        } else {
+               range->pending_ops &= ~AMDGPU_SVM_RANGE_OP_RESTORE;
                range->queue_state = AMDGPU_SVM_RANGE_NOT_QUEUED;
                release_kref = true;
        }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.h
index 2d677f0965b4..4bf05c304564 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.h
@@ -55,6 +55,7 @@ struct amdgpu_svm_range {
        struct drm_gpusvm_range base;
        struct list_head work_node;
        bool gpu_mapped;
+       bool suppress_restore;
        u8 queue_state;
        u8 pending_ops;
        unsigned long pending_start_page;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.c
index 8bf00aed7f15..45a611479d1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.c
@@ -28,6 +28,7 @@
 #include "amdgpu_svm_attr.h"
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_migrate.h"
 
 #include <drm/drm_exec.h>
 #include <drm/drm_gpusvm.h>
@@ -151,6 +152,11 @@ svm_restore_enqueue_work(struct amdgpu_svm *svm,
 
        spin_lock(&svm->work_lock);
 
+       if (!UNMAP_WORK(pending_ops) && READ_ONCE(range->suppress_restore)) {
+               spin_unlock(&svm->work_lock);
+               return;
+       }
+
        /* Deny any work if range is unmapped */
        if (UNMAP_WORK(range->pending_ops)) {
                spin_unlock(&svm->work_lock);
@@ -232,7 +238,7 @@ svm_restore_range(struct amdgpu_svm *svm, struct 
amdgpu_svm_range *range)
        unsigned long range_start_page;
        int ret;
        struct drm_gpusvm_ctx map_ctx;
-       bool devmem_possible, need_vram_migration;
+       bool devmem_possible;
 
        amdgpu_svm_assert_locked(svm);
 
@@ -247,20 +253,20 @@ svm_restore_range(struct amdgpu_svm *svm, struct 
amdgpu_svm_range *range)
        if (!attr_range || !amdgpu_svm_attr_has_access(attrs.access))
                return 0;
 
+       if (amdgpu_svm_range_is_valid(svm, range, &attrs))
+               return 0;
+
        devmem_possible = amdgpu_svm_devmem_possible(svm);
-       need_vram_migration = devmem_possible &&
-                             amdgpu_svm_attr_prefer_vram(&attrs);
 
        map_ctx = (struct drm_gpusvm_ctx){
                .read_only = !!(attrs.flags & AMDGPU_SVM_ATTR_BIT_GPU_RO),
                .devmem_possible = devmem_possible,
-               .devmem_only = need_vram_migration,
+               .devmem_only = false,
                .check_pages_threshold = devmem_possible ? SZ_64K : 0,
+               .device_private_page_owner = devmem_possible ?
+                       AMDGPU_PGMAP_OWNER(svm->adev) : NULL,
        };
 
-       if (amdgpu_svm_range_is_valid(svm, range, &attrs))
-               return 0;
-
        AMDGPU_SVM_RANGE_DEBUG(range, "RESTORE - GET PAGES");
 
        ret = amdgpu_svm_range_get_pages(svm, &range->base, &map_ctx);
@@ -289,6 +295,42 @@ static bool amdgpu_svm_nonretryable(int ret)
        }
 }
 
+static void
+svm_restore_suppress_range_restore(struct amdgpu_svm *svm,
+                                  struct amdgpu_svm_range *range)
+{
+       bool release_kref = false;
+
+       spin_lock(&svm->work_lock);
+
+       WRITE_ONCE(range->suppress_restore, true);
+       range->pending_ops &= ~AMDGPU_SVM_RANGE_OP_RESTORE;
+
+       if (range->queue_state == AMDGPU_SVM_RANGE_IN_RESTORE) {
+               list_del_init(&range->work_node);
+               range->queue_state = AMDGPU_SVM_RANGE_NOT_QUEUED;
+               range->pending_start_page = ULONG_MAX;
+               range->pending_last_page = 0;
+               release_kref = true;
+       }
+
+       spin_unlock(&svm->work_lock);
+
+       if (release_kref)
+               drm_gpusvm_range_put(&range->base);
+}
+
+static void
+svm_restore_unsuppress_range_restore(struct amdgpu_svm *svm,
+                                    struct amdgpu_svm_range *range,
+                                    unsigned long start_page,
+                                    unsigned long last_page)
+{
+       WRITE_ONCE(range->suppress_restore, false);
+       svm_restore_enqueue_work(svm, range, start_page, last_page,
+                              AMDGPU_SVM_RANGE_OP_RESTORE);
+}
+
 static void amdgpu_svm_restore_worker(struct work_struct *w)
 {
        struct delayed_work *dwork = to_delayed_work(w);
@@ -334,17 +376,19 @@ static void amdgpu_svm_restore_worker(struct work_struct 
*w)
                                                 op_ctx.start_page,
                                                 op_ctx.last_page, ret);
                                spin_lock(&svm->work_lock);
-                               if (!UNMAP_WORK(op_ctx.range->pending_ops))
+                               if (!UNMAP_WORK(op_ctx.range->pending_ops) &&
+                                   !READ_ONCE(op_ctx.range->suppress_restore)) 
{
                                        op_ctx.range->pending_ops |=
                                                AMDGPU_SVM_RANGE_OP_RESTORE;
-                               op_ctx.range->pending_start_page =
-                                       min(op_ctx.range->pending_start_page,
-                                           op_ctx.start_page);
-                               op_ctx.range->pending_last_page =
-                                       max(op_ctx.range->pending_last_page,
-                                           op_ctx.last_page);
+                                       op_ctx.range->pending_start_page =
+                                               
min(op_ctx.range->pending_start_page,
+                                                   op_ctx.start_page);
+                                       op_ctx.range->pending_last_page =
+                                               
max(op_ctx.range->pending_last_page,
+                                                   op_ctx.last_page);
+                                       need_resched = true;
+                               }
                                spin_unlock(&svm->work_lock);
-                               need_resched = true;
                        } else {
                                AMDGPU_SVM_TRACE(
                                        "restore work drop non retryable 
[0x%lx-0x%lx] ret=%d\n",
@@ -746,6 +790,7 @@ svm_restore_realign_boundary(struct amdgpu_svm *svm,
                rs = drm_gpusvm_range_start(r) >> PAGE_SHIFT;
                rl = (drm_gpusvm_range_end(r) >> PAGE_SHIFT) - 1;
 
+               svm_restore_suppress_range_restore(svm, svm_range);
                amdgpu_svm_range_evict(svm, r);
 
                if (svm_range->gpu_mapped) {
@@ -755,6 +800,7 @@ svm_restore_realign_boundary(struct amdgpu_svm *svm,
                        if (ret < 0) {
                                AMDGPU_SVM_TRACE("zap failed ret=%d 
[0x%lx-0x%lx]\n",
                                                 ret, rs, rl);
+                               svm_restore_unsuppress_range_restore(svm, 
svm_range, rs, rl);
                                drm_exec_fini(&exec);
                                return ret;
                        }
-- 
2.34.1

Reply via email to