From: Honglei Huang <[email protected]> amdgpu_userptr.c: - svm_restore_find_or_insert_by_attrs / svm_restore_find_or_insert: insert empty placeholder drm_gpusvm_ranges over [start, end) using attrs from the attr tree (no get_pages, no PTE update). - svm_restore_realign_boundary: locate the (at most two) crossing ranges via O(log N) interval-tree lookups, evict devmem pages, zap PTEs and remove the ranges under drm_exec, then re-insert the head/tail residue as placeholders and enqueue them to the restore worker. If any crossing range was GPU-mapped, join the eviction window and report it back so the caller kicks the worker. - amdgpu_svm_map_attrs_with_restore: one-stop helper that realigns the boundary, maps the requested interval with the new attrs, and kicks the restore worker if the eviction counter was touched.
amdgpu_userptr.h: - Declare amdgpu_svm_map_attrs_with_restore() and forward-declare struct amdgpu_svm_attrs. No in-tree caller yet; wired up in the next commit. Signed-off-by: Honglei Huang <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.c | 229 ++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.h | 5 + 2 files changed, 234 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.c index 34b3b4149..040406a28 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.c @@ -29,6 +29,7 @@ #include "amdgpu.h" #include "amdgpu_amdkfd.h" +#include <drm/drm_exec.h> #include <drm/drm_gpusvm.h> #include <uapi/linux/kfd_ioctl.h> @@ -477,6 +478,85 @@ svm_restore_map_interval(struct amdgpu_svm *svm, return saved_ret; } +static int +svm_restore_find_or_insert_by_attrs(struct amdgpu_svm *svm, + const struct amdgpu_svm_attrs *attrs, + unsigned long start, unsigned long end) +{ + unsigned long addr = start; + bool devmem_possible = amdgpu_svm_attr_devmem_possible(svm, attrs); + bool need_vram_migration = amdgpu_svm_attr_prefer_vram(svm, attrs); + struct drm_gpusvm_ctx map_ctx = { + .read_only = !!(attrs->flags & AMDGPU_SVM_ATTR_BIT_GPU_RO), + .devmem_possible = devmem_possible, + .devmem_only = need_vram_migration && devmem_possible, + .check_pages_threshold = devmem_possible ? SZ_64K : 0, + }; + + while (addr < end) { + struct amdgpu_svm_range *range; + unsigned long next_addr; + + range = amdgpu_svm_range_find_or_insert(svm, addr, addr, end, + &map_ctx); + if (IS_ERR(range)) { + AMDGPU_SVM_ERR("find_or_insert failed at 0x%lx [0x%lx-0x%lx) ret=%ld\n", + addr, start, end, PTR_ERR(range)); + return PTR_ERR(range); + } + + next_addr = drm_gpusvm_range_end(&range->base); + if (next_addr <= addr) + return -EINVAL; + + addr = next_addr; + } + + return 0; +} + +static int +svm_restore_find_or_insert(struct amdgpu_svm *svm, + unsigned long start_page, + unsigned long last_page) +{ + struct amdgpu_svm_attr_tree *attr_tree = svm->attr_tree; + unsigned long cursor = start_page; + int ret = 0; + + amdgpu_svm_assert_locked(svm); + + while (cursor <= last_page) { + struct amdgpu_svm_attr_range *attr_range; + struct amdgpu_svm_attrs attrs; + unsigned long seg_start, seg_last; + + mutex_lock(&attr_tree->lock); + attr_range = amdgpu_svm_attr_get_bounds_locked(attr_tree, cursor, + &seg_start, &seg_last); + if (attr_range) + attrs = attr_range->attrs; + mutex_unlock(&attr_tree->lock); + + seg_last = min(seg_last, last_page); + if (attr_range && amdgpu_svm_attr_has_access(attrs.access)) { + int ret; + + ret = svm_restore_find_or_insert_by_attrs(svm, &attrs, + cursor << PAGE_SHIFT, + (seg_last + 1) << PAGE_SHIFT); + if (ret) + return ret; + } + + if (seg_last >= last_page) + break; + cursor = seg_last + 1; + } + + return ret; +} + static void svm_restore_enqueue_unmapped(struct amdgpu_svm *svm, unsigned long start_page, @@ -566,6 +646,155 @@ void amdgpu_svm_restore_gc_work_func(struct work_struct *w) msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); } +/* + * Because a range cannot be split, when the user sets a new attribute over + * an existing range it may result in at most two crossing ranges that + * straddle the new boundary. The crossing ranges need to be destroyed and + * rebuilt with the new attr boundary. + * See below: + * [old attr] [new attr] [old attr] + * [ range1 ][ range 2 ] + */ +static int +svm_restore_realign_boundary(struct amdgpu_svm *svm, + unsigned long start_page, + unsigned long last_page, + bool *eviction_taken) +{ + unsigned long start = start_page << PAGE_SHIFT; + unsigned long end = (last_page + 1) << PAGE_SHIFT; + unsigned long head_start_page = start_page; + unsigned long tail_last_page = last_page; + struct drm_gpusvm_ctx ctx = { .in_notifier = false }; + struct drm_gpusvm_notifier *notifier; + struct drm_gpusvm_range *head = NULL, *tail = NULL; + struct drm_gpusvm_range *crossings[2]; + struct drm_exec exec; + bool needs_flush = false; + int i, ret; + + amdgpu_svm_assert_locked(svm); + + *eviction_taken = false; + + notifier = drm_gpusvm_notifier_find(&svm->gpusvm, start, start + 1); + if (notifier) { + head = drm_gpusvm_range_find(notifier, start, start + 1); + if (head && drm_gpusvm_range_start(head) >= start) + head = NULL; + } + + notifier = drm_gpusvm_notifier_find(&svm->gpusvm, end - 1, end); + if (notifier) { + tail = drm_gpusvm_range_find(notifier, end - 1, end); + if (tail == head) + tail = NULL; + else if (tail && drm_gpusvm_range_end(tail) <= end) + tail = NULL; + } + + if (!head && !tail) + return 0; + + if (head) + head_start_page = drm_gpusvm_range_start(head) >> PAGE_SHIFT; + if (tail) + tail_last_page = (drm_gpusvm_range_end(tail) >> PAGE_SHIFT) - 1; + + if ((head && READ_ONCE(to_amdgpu_svm_range(head)->gpu_mapped)) || + (tail && READ_ONCE(to_amdgpu_svm_range(tail)->gpu_mapped))) { + /* Stop queue if any crossing range*/ + svm_restore_eviction_begin(svm); + *eviction_taken = true; + } + + ret = amdgpu_svm_range_lock_vm_pd(svm, &exec, true); + if (ret) + return ret; + + crossings[0] = head; + crossings[1] = tail; + + for (i = 0; i < ARRAY_SIZE(crossings); i++) { + struct drm_gpusvm_range *r = crossings[i]; + struct amdgpu_svm_range *svm_range; + unsigned long rs, rl; + + if (!r) + continue; + + svm_range = to_amdgpu_svm_range(r); + rs = drm_gpusvm_range_start(r) >> PAGE_SHIFT; + rl = (drm_gpusvm_range_end(r) >> PAGE_SHIFT) - 1; + + amdgpu_svm_range_evict(svm, r); + + if (svm_range->gpu_mapped) { + AMDGPU_SVM_RANGE_DEBUG(svm_range, "ATTR DESTROY"); + + ret = amdgpu_svm_range_zap_ptes(svm, svm_range, rs, rl); + if (ret < 0) { + AMDGPU_SVM_TRACE("zap failed ret=%d [0x%lx-0x%lx]\n", + ret, rs, rl); + drm_exec_fini(&exec); + return ret; + } + needs_flush = true; + } + + amdgpu_svm_range_remove(svm, svm_range, &ctx); + } + + drm_exec_fini(&exec); + + if (needs_flush) + svm->flush_tlb(svm); + + /* Insert the range only and queue to restore worker here. + * Cause the get pages may fail and need retry. + * So doing the restore asynchronously in worker to avoid the complex + * retry logic in ioctl path, and avoid the complex resume + * synchronization logic between ioctl and worker. + */ + if (head_start_page < start_page) { + svm_restore_find_or_insert(svm, head_start_page, start_page - 1); + svm_restore_enqueue_unmapped(svm, head_start_page, start_page - 1); + } + if (tail_last_page > last_page) { + svm_restore_find_or_insert(svm, last_page + 1, tail_last_page); + svm_restore_enqueue_unmapped(svm, last_page + 1, tail_last_page); + } + + return 0; +} + +int +amdgpu_svm_map_attrs_with_restore(struct amdgpu_svm *svm, + const struct amdgpu_svm_attrs *new_attrs, + unsigned long start_page, + unsigned long last_page) +{ + bool eviction_taken = false; + int ret; + + /* Check the boundary */ + ret = svm_restore_realign_boundary(svm, start_page, last_page, + &eviction_taken); + if (ret) + goto out; + + /* Map the user requested interval with the new attributes */ + ret = amdgpu_svm_range_map_attrs(svm, new_attrs, + start_page << PAGE_SHIFT, + (last_page + 1) << PAGE_SHIFT); + +out: + if (eviction_taken) + mod_delayed_work(svm->restore.wq, &svm->restore.work, 0); + + return ret; +} + void amdgpu_svm_quiesce_compute(struct amdgpu_svm *svm) { struct mm_struct *mm = svm->gpusvm.mm; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.h index a8f76f785..a769f4915 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.h @@ -59,5 +59,10 @@ void amdgpu_svm_restore_invalidate(struct amdgpu_svm *svm, uint64_t adj_start, uint64_t adj_end); void amdgpu_svm_quiesce_compute(struct amdgpu_svm *svm); void amdgpu_svm_resume_compute(struct amdgpu_svm *svm); +struct amdgpu_svm_attrs; +int amdgpu_svm_map_attrs_with_restore(struct amdgpu_svm *svm, + const struct amdgpu_svm_attrs *new_attrs, + unsigned long start_page, + unsigned long last_page); #endif /* __AMDGPU_USERPTR_H__ */ -- 2.34.1
