From: Honglei Huang <[email protected]>

amdgpu_userptr.c:
- svm_restore_find_or_insert_by_attrs / svm_restore_find_or_insert:
  insert empty placeholder drm_gpusvm_ranges over [start, end) using
  attrs from the attr tree (no get_pages, no PTE update).
- svm_restore_realign_boundary: locate the (at most two) crossing
  ranges via O(log N) interval-tree lookups, evict devmem pages,
  zap PTEs and remove the ranges under drm_exec, then re-insert the
  head/tail residue as placeholders and enqueue them to the restore
  worker. If any crossing range was GPU-mapped, join the eviction
  window and report it back so the caller kicks the worker.
- amdgpu_svm_map_attrs_with_restore: one-stop helper that realigns
  the boundary, maps the requested interval with the new attrs,
  and kicks the restore worker if the eviction counter was touched.

amdgpu_userptr.h:
- Declare amdgpu_svm_map_attrs_with_restore() and forward-declare
  struct amdgpu_svm_attrs.

No in-tree caller yet; wired up in the next commit.

Signed-off-by: Honglei Huang <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.c | 229 ++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.h |   5 +
 2 files changed, 234 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.c
index 34b3b4149..040406a28 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.c
@@ -29,6 +29,7 @@
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 
+#include <drm/drm_exec.h>
 #include <drm/drm_gpusvm.h>
 
 #include <uapi/linux/kfd_ioctl.h>
@@ -477,6 +478,85 @@ svm_restore_map_interval(struct amdgpu_svm *svm,
        return saved_ret;
 }
 
+static int
+svm_restore_find_or_insert_by_attrs(struct amdgpu_svm *svm,
+                         const struct amdgpu_svm_attrs *attrs,
+                         unsigned long start, unsigned long end)
+{
+       unsigned long addr = start;
+       bool devmem_possible = amdgpu_svm_attr_devmem_possible(svm, attrs);
+       bool need_vram_migration = amdgpu_svm_attr_prefer_vram(svm, attrs);
+       struct drm_gpusvm_ctx map_ctx = {
+               .read_only = !!(attrs->flags & AMDGPU_SVM_ATTR_BIT_GPU_RO),
+               .devmem_possible = devmem_possible,
+               .devmem_only = need_vram_migration && devmem_possible,
+               .check_pages_threshold = devmem_possible ? SZ_64K : 0,
+       };
+
+       while (addr < end) {
+               struct amdgpu_svm_range *range;
+               unsigned long next_addr;
+
+               range = amdgpu_svm_range_find_or_insert(svm, addr, addr, end,
+                                                       &map_ctx);
+               if (IS_ERR(range)) {
+                       AMDGPU_SVM_ERR("find_or_insert failed at 0x%lx 
[0x%lx-0x%lx) ret=%ld\n",
+                                      addr, start, end, PTR_ERR(range));
+                       return PTR_ERR(range);
+               }
+
+               next_addr = drm_gpusvm_range_end(&range->base);
+               if (next_addr <= addr)
+                       return -EINVAL;
+
+               addr = next_addr;
+       }
+
+       return 0;
+}
+
+static int
+svm_restore_find_or_insert(struct amdgpu_svm *svm,
+                          unsigned long start_page,
+                          unsigned long last_page)
+{
+       struct amdgpu_svm_attr_tree *attr_tree = svm->attr_tree;
+       unsigned long cursor = start_page;
+       int ret = 0;
+
+       amdgpu_svm_assert_locked(svm);
+
+       while (cursor <= last_page) {
+               struct amdgpu_svm_attr_range *attr_range;
+               struct amdgpu_svm_attrs attrs;
+               unsigned long seg_start, seg_last;
+
+               mutex_lock(&attr_tree->lock);
+               attr_range = amdgpu_svm_attr_get_bounds_locked(attr_tree, 
cursor,
+                                                              &seg_start, 
&seg_last);
+               if (attr_range)
+                       attrs = attr_range->attrs;
+               mutex_unlock(&attr_tree->lock);
+
+               seg_last = min(seg_last, last_page);
+               if (attr_range && amdgpu_svm_attr_has_access(attrs.access)) {
+                       int ret;
+
+                       ret = svm_restore_find_or_insert_by_attrs(svm, &attrs,
+                                       cursor << PAGE_SHIFT,
+                                       (seg_last + 1) << PAGE_SHIFT);
+                       if (ret)
+                               return ret;
+               }
+
+               if (seg_last >= last_page)
+                       break;
+               cursor = seg_last + 1;
+       }
+
+       return ret;
+}
+
 static void
 svm_restore_enqueue_unmapped(struct amdgpu_svm *svm,
                             unsigned long start_page,
@@ -566,6 +646,155 @@ void amdgpu_svm_restore_gc_work_func(struct work_struct 
*w)
                                   
msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
 }
 
+/*
+ * Because a range cannot be split, when the user sets a new attribute over
+ * an existing range it may result in at most two crossing ranges that
+ * straddle the new boundary. The crossing ranges need to be destroyed and
+ * rebuilt with the new attr boundary.
+ * See below:
+ * [old attr] [new attr] [old attr]
+ * [     range1  ][   range 2     ]
+ */
+static int
+svm_restore_realign_boundary(struct amdgpu_svm *svm,
+                            unsigned long start_page,
+                            unsigned long last_page,
+                            bool *eviction_taken)
+{
+       unsigned long start = start_page << PAGE_SHIFT;
+       unsigned long end = (last_page + 1) << PAGE_SHIFT;
+       unsigned long head_start_page = start_page;
+       unsigned long tail_last_page = last_page;
+       struct drm_gpusvm_ctx ctx = { .in_notifier = false };
+       struct drm_gpusvm_notifier *notifier;
+       struct drm_gpusvm_range *head = NULL, *tail = NULL;
+       struct drm_gpusvm_range *crossings[2];
+       struct drm_exec exec;
+       bool needs_flush = false;
+       int i, ret;
+
+       amdgpu_svm_assert_locked(svm);
+
+       *eviction_taken = false;
+
+       notifier = drm_gpusvm_notifier_find(&svm->gpusvm, start, start + 1);
+       if (notifier) {
+               head = drm_gpusvm_range_find(notifier, start, start + 1);
+               if (head && drm_gpusvm_range_start(head) >= start)
+                       head = NULL;
+       }
+
+       notifier = drm_gpusvm_notifier_find(&svm->gpusvm, end - 1, end);
+       if (notifier) {
+               tail = drm_gpusvm_range_find(notifier, end - 1, end);
+               if (tail == head)
+                       tail = NULL;
+               else if (tail && drm_gpusvm_range_end(tail) <= end)
+                       tail = NULL;
+       }
+
+       if (!head && !tail)
+               return 0;
+
+       if (head)
+               head_start_page = drm_gpusvm_range_start(head) >> PAGE_SHIFT;
+       if (tail)
+               tail_last_page = (drm_gpusvm_range_end(tail) >> PAGE_SHIFT) - 1;
+
+       if ((head && READ_ONCE(to_amdgpu_svm_range(head)->gpu_mapped)) ||
+               (tail && READ_ONCE(to_amdgpu_svm_range(tail)->gpu_mapped))) {
+               /* Stop queue if any crossing range*/
+               svm_restore_eviction_begin(svm);
+               *eviction_taken = true;
+       }
+
+       ret = amdgpu_svm_range_lock_vm_pd(svm, &exec, true);
+       if (ret)
+               return ret;
+
+       crossings[0] = head;
+       crossings[1] = tail;
+
+       for (i = 0; i < ARRAY_SIZE(crossings); i++) {
+               struct drm_gpusvm_range *r = crossings[i];
+               struct amdgpu_svm_range *svm_range;
+               unsigned long rs, rl;
+
+               if (!r)
+                       continue;
+
+               svm_range = to_amdgpu_svm_range(r);
+               rs = drm_gpusvm_range_start(r) >> PAGE_SHIFT;
+               rl = (drm_gpusvm_range_end(r) >> PAGE_SHIFT) - 1;
+
+               amdgpu_svm_range_evict(svm, r);
+
+               if (svm_range->gpu_mapped) {
+                       AMDGPU_SVM_RANGE_DEBUG(svm_range, "ATTR DESTROY");
+
+                       ret = amdgpu_svm_range_zap_ptes(svm, svm_range, rs, rl);
+                       if (ret < 0) {
+                               AMDGPU_SVM_TRACE("zap failed ret=%d 
[0x%lx-0x%lx]\n",
+                                                ret, rs, rl);
+                               drm_exec_fini(&exec);
+                               return ret;
+                       }
+                       needs_flush = true;
+               }
+
+               amdgpu_svm_range_remove(svm, svm_range, &ctx);
+       }
+
+       drm_exec_fini(&exec);
+
+       if (needs_flush)
+               svm->flush_tlb(svm);
+
+       /* Insert the range only and queue to restore worker here.
+        * Cause the get pages may fail and need retry.
+        * So doing the restore asynchronously in worker to avoid the complex
+        * retry logic in ioctl path, and avoid the complex resume
+        * synchronization logic between ioctl and worker.
+        */
+       if (head_start_page < start_page) {
+               svm_restore_find_or_insert(svm, head_start_page, start_page - 
1);
+               svm_restore_enqueue_unmapped(svm, head_start_page, start_page - 
1);
+       }
+       if (tail_last_page > last_page) {
+               svm_restore_find_or_insert(svm, last_page + 1, tail_last_page);
+               svm_restore_enqueue_unmapped(svm, last_page + 1, 
tail_last_page);
+       }
+
+       return 0;
+}
+
+int
+amdgpu_svm_map_attrs_with_restore(struct amdgpu_svm *svm,
+                                 const struct amdgpu_svm_attrs *new_attrs,
+                                 unsigned long start_page,
+                                 unsigned long last_page)
+{
+       bool eviction_taken = false;
+       int ret;
+
+       /* Check the boundary */
+       ret = svm_restore_realign_boundary(svm, start_page, last_page,
+                                          &eviction_taken);
+       if (ret)
+               goto out;
+
+       /* Map the user requested interval with the new attributes */
+       ret = amdgpu_svm_range_map_attrs(svm, new_attrs,
+                                        start_page << PAGE_SHIFT,
+                                        (last_page + 1) << PAGE_SHIFT);
+
+out:
+       if (eviction_taken)
+               mod_delayed_work(svm->restore.wq, &svm->restore.work, 0);
+
+       return ret;
+}
+
 void amdgpu_svm_quiesce_compute(struct amdgpu_svm *svm)
 {
        struct mm_struct *mm = svm->gpusvm.mm;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.h
index a8f76f785..a769f4915 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.h
@@ -59,5 +59,10 @@ void amdgpu_svm_restore_invalidate(struct amdgpu_svm *svm,
                                   uint64_t adj_start, uint64_t adj_end);
 void amdgpu_svm_quiesce_compute(struct amdgpu_svm *svm);
 void amdgpu_svm_resume_compute(struct amdgpu_svm *svm);
+struct amdgpu_svm_attrs;
+int amdgpu_svm_map_attrs_with_restore(struct amdgpu_svm *svm,
+                                     const struct amdgpu_svm_attrs *new_attrs,
+                                     unsigned long start_page,
+                                     unsigned long last_page);
 
 #endif /* __AMDGPU_USERPTR_H__ */
-- 
2.34.1

Reply via email to