From: Honglei Huang <[email protected]> Implement the garbage collection work function for xnack-off SVM ranges. When a range is fully unmapped, the GC worker removes it. When a range is partially unmapped, it removes the stale range and synchronously rebuilds the surviving sub-region via svm_restore_map_interval(). The partial-unmap path evicts VRAM-backed pages back to sysmem via amdgpu_svm_range_evict() (the devmem-aware wrapper) before remove, so live data is preserved.
In svm_restore_map_interval(), cache vma->vm_start / vma->vm_end into local variables before dropping mmap_read_lock so that subsequent find_vma() and loop condition checks use the cached values rather than dereferencing a potentially stale vma pointer. The per-iteration cursor advance is simplified to "if (seg_last >= last_page) break; cursor = seg_last + 1;" -- attr_get_bounds() returns the segment containing cursor, so seg_start <= cursor and the explicit ULONG_MAX guard is redundant. Permanent (non-retryable) errors from amdgpu_svm_range_map_attrs() are classified and skipped with a trace log rather than retried: -ENOENT, -EFAULT and -EPERM come from hmm_range_fault() and reflect VMA state that the worker cannot fix (no VMA, non-faultable mapping, RO VMA written), -EINVAL reflects a sanity-check failure that will not change on retry, and -EHWPOISON reflects physical page corruption. All other non-zero returns (e.g. -EBUSY, -EAGAIN, -ETIME, -ENOMEM) are treated as transient and saved for the worker to retry. The helper is named svm_restore_enqueue_unmapped() (rather than "requeue") to reflect that it inserts ranges into the restore queue from both the partial-unmap fallback path here and the attr-change realignment path added later. Signed-off-by: Honglei Huang <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.c | 197 ++++++++++++++++++++ 1 file changed, 197 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.c index 89e8b687b..0b02008be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.c @@ -369,6 +369,203 @@ static void amdgpu_svm_restore_worker(struct work_struct *w) queue_delayed_work(svm->restore.wq, &svm->restore.work, resched_delay); } + +/* + * The interval may straddle VMA holes regions with no VMA backing. + * + * 1. Walk mm's VMA tree with find_vma_intersection() and skip the + * holes between VMAs entirely, map_attrs() would just return + * -EFAULT on a hole. + * 2. For each VMA clipped chunk, call amdgpu_svm_range_map_attrs(). + * Errors that the worker has no way to recover from -ENOENT, + * -EFAULT, -EPERM, -EINVAL, -EHWPOISON are classified as + * permanent: trace and skip the chunk, then continue with the + * next VMA so a single bad page/VMA does not abort the rebuild + * of the remaining interval. + */ +static int +svm_restore_map_attr_segment(struct amdgpu_svm *svm, + struct mm_struct *mm, + const struct amdgpu_svm_attrs *attrs, + unsigned long start, + unsigned long end) +{ + unsigned long pos = start; + int saved_ret = 0; + struct vm_area_struct *vma; + unsigned long chunk_start, chunk_end; + int ret; + + amdgpu_svm_assert_locked(svm); + + while (pos < end) { + mmap_read_lock(mm); + vma = find_vma_intersection(mm, pos, end); + if (vma) { + chunk_start = max(vma->vm_start, pos); + chunk_end = min(vma->vm_end, end); + } + mmap_read_unlock(mm); + + if (!vma) + break; + + ret = amdgpu_svm_range_map_attrs(svm, attrs, + chunk_start, chunk_end); + if (ret == -ENOENT || ret == -EFAULT || + ret == -EPERM || ret == -EINVAL || + ret == -EHWPOISON) { + AMDGPU_SVM_TRACE("%s skip permanent [0x%lx-0x%lx) ret=%d\n", + __func__, chunk_start, chunk_end, ret); + } else if (ret) { + AMDGPU_SVM_ERR("%s failed [0x%lx-0x%lx) ret=%d\n", + __func__, chunk_start, chunk_end, ret); + saved_ret = ret; + } + + pos = chunk_end; + } + + return saved_ret; +} + +static int +svm_restore_map_interval(struct amdgpu_svm *svm, + unsigned long start_page, + unsigned long last_page) +{ + struct amdgpu_svm_attr_tree *attr_tree = svm->attr_tree; + struct mm_struct *mm = svm->gpusvm.mm; + unsigned long cursor = start_page; + int saved_ret = 0; + int ret; + + amdgpu_svm_assert_locked(svm); + + if (!mmget_not_zero(mm)) + return -ESRCH; + + while (cursor <= last_page) { + struct amdgpu_svm_attr_range *attr_range; + struct amdgpu_svm_attrs attrs; + unsigned long seg_start, seg_last; + + mutex_lock(&attr_tree->lock); + attr_range = amdgpu_svm_attr_get_bounds_locked(attr_tree, cursor, + &seg_start, &seg_last); + if (attr_range) + attrs = attr_range->attrs; + mutex_unlock(&attr_tree->lock); + + seg_last = min(seg_last, last_page); + if (attr_range && amdgpu_svm_attr_has_access(attrs.access)) { + + ret = svm_restore_map_attr_segment(svm, mm, &attrs, + cursor << PAGE_SHIFT, + (seg_last + 1) << PAGE_SHIFT); + if (ret) + saved_ret = ret; + } + + if (seg_last >= last_page) + break; + cursor = seg_last + 1; + } + + mmput(mm); + + return saved_ret; +} + +static void +svm_restore_enqueue_unmapped(struct amdgpu_svm *svm, + unsigned long start_page, + unsigned long last_page) +{ + struct drm_gpusvm_notifier *notifier; + struct drm_gpusvm_range *r; + unsigned long start = start_page << PAGE_SHIFT; + unsigned long end = (last_page + 1) << PAGE_SHIFT; + + amdgpu_svm_assert_locked(svm); + + drm_gpusvm_for_each_notifier(notifier, &svm->gpusvm, start, end) { + r = NULL; + drm_gpusvm_for_each_range(r, notifier, start, end) { + struct amdgpu_svm_range *svm_range = + to_amdgpu_svm_range(r); + unsigned long rs, rl; + + if (READ_ONCE(svm_range->gpu_mapped)) + continue; + + rs = drm_gpusvm_range_start(r) >> PAGE_SHIFT; + rl = (drm_gpusvm_range_end(r) >> PAGE_SHIFT) - 1; + + svm_restore_enqueue_work(svm, svm_range, rs, rl, + AMDGPU_SVM_RANGE_OP_RESTORE); + } + } +} + +void amdgpu_svm_restore_gc_work_func(struct work_struct *w) +{ + struct amdgpu_svm_gc *gc = container_of(w, struct amdgpu_svm_gc, work); + struct amdgpu_svm *svm = container_of(gc, struct amdgpu_svm, gc); + struct amdgpu_svm_range_op_ctx op_ctx; + struct drm_gpusvm_ctx ctx = { .in_notifier = false }; + unsigned long range_start_page; + unsigned long range_last_page; + bool partial; + int ret; + + spin_lock(&svm->work_lock); + while (amdgpu_svm_range_dequeue_locked(svm, &svm->gc.list, + &op_ctx)) { + spin_unlock(&svm->work_lock); + range_start_page = drm_gpusvm_range_start(&op_ctx.range->base) >> PAGE_SHIFT; + range_last_page = (drm_gpusvm_range_end(&op_ctx.range->base) >> PAGE_SHIFT) - 1; + partial = op_ctx.range->base.pages.flags.partial_unmap; + ret = 0; + + WARN_ON(!UNMAP_WORK(op_ctx.pending_ops)); + + down_write(&svm->svm_lock); + + if (partial) + amdgpu_svm_range_evict(svm, &op_ctx.range->base); + + amdgpu_svm_range_remove(svm, op_ctx.range, &ctx); + + /* Remove the range immediately prevent the stale range block the + * rebuild. + */ + drm_gpusvm_range_put(&op_ctx.range->base); + op_ctx.range = NULL; + + if (partial) { + /* Rebuild the valid area if partial unmap, + * cause gc always remove the entire range. + */ + ret = svm_restore_map_interval(svm, range_start_page, + range_last_page); + if (ret) + svm_restore_enqueue_unmapped( + svm, range_start_page, + range_last_page); + } + + up_write(&svm->svm_lock); + + spin_lock(&svm->work_lock); + } + spin_unlock(&svm->work_lock); + + if (atomic_read(&svm->restore.evicted_ranges)) + queue_delayed_work(svm->restore.wq, &svm->restore.work, + msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); +} + int amdgpu_svm_restore_init(struct amdgpu_svm *svm, void (*begin)(struct amdgpu_svm *), void (*end)(struct amdgpu_svm *)) -- 2.34.1
