From: Honglei Huang <[email protected]> Implement the core restore infrastructure and invalidation callback for xnack-off SVM. The invalidate callback handles MMU notifier events by quiescing compute queues when needed and scheduling restore work.
For MMU_NOTIFY_UNMAP events, avoid unconditionally quiescing compute queues -- only quiesce when a range is partially unmapped and has active GPU mappings (i.e. the GC worker will need a rebuild window). Full-range unmaps and ranges without GPU mappings skip the quiesce entirely, which reduces unnecessary queue preemption traffic. Non-UNMAP events (CLEAR/MIGRATE) always quiesce because PTEs are cleared synchronously and the restore is asynchronous. Signed-off-by: Honglei Huang <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.c | 257 ++++++++++++++++++++ 1 file changed, 257 insertions(+) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.c diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.c new file mode 100644 index 000000000..b231c7d44 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userptr.c @@ -0,0 +1,257 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2026 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu_svm.h" +#include "amdgpu_userptr.h" +#include "amdgpu_svm_range.h" +#include "amdgpu_svm_attr.h" +#include "amdgpu.h" +#include "amdgpu_amdkfd.h" + +#include <drm/drm_gpusvm.h> + +#include <uapi/linux/kfd_ioctl.h> + +#define AMDGPU_SVM_RESTORE_WQ_NAME "amdgpu_svm_restore" + +static void svm_restore_enqueue_work(struct amdgpu_svm *svm, + struct amdgpu_svm_range *range, + unsigned long start_page, + unsigned long last_page, + uint8_t pending_ops); + +static void amdgpu_svm_restore_worker(struct work_struct *w); + +static void +svm_restore_notifier_event_end(struct amdgpu_svm *svm, + struct drm_gpusvm_range *range, + const struct mmu_notifier_range *mmu_range) +{ + struct drm_gpusvm_ctx ctx = { .in_notifier = true, }; + struct amdgpu_svm_range *svm_range = to_amdgpu_svm_range(range); + unsigned long start_page = max(drm_gpusvm_range_start(&svm_range->base), + mmu_range->start) >> PAGE_SHIFT; + unsigned long last_page = (min(drm_gpusvm_range_end(&svm_range->base), + mmu_range->end) >> PAGE_SHIFT) - 1; + + amdgpu_svm_assert_in_notifier(svm); + + drm_gpusvm_range_unmap_pages(&svm->gpusvm, range, &ctx); + if (mmu_range->event == MMU_NOTIFY_UNMAP) { + AMDGPU_SVM_RANGE_DEBUG(svm_range, "GARBAGE COLLECTOR ADD"); + drm_gpusvm_range_set_unmapped(&svm_range->base, mmu_range); + svm_restore_enqueue_work(svm, svm_range, start_page, last_page, + AMDGPU_SVM_RANGE_OP_UNMAP); + } else { + AMDGPU_SVM_RANGE_DEBUG(svm_range, "RESTORE WORKER ADD"); + svm_restore_enqueue_work(svm, svm_range, start_page, last_page, + AMDGPU_SVM_RANGE_OP_RESTORE); + } +} + +static void svm_restore_eviction_begin(struct amdgpu_svm *svm) +{ + if (atomic_inc_return(&svm->restore.evicted_ranges) != 1) + return; + + svm->restore.begin(svm); +} + +static bool +svm_restore_unmap_needs_quiesce(struct drm_gpusvm_range *range, + const struct mmu_notifier_range *mmu_range) +{ + struct amdgpu_svm_range *svm_range = to_amdgpu_svm_range(range); + + if (!READ_ONCE(svm_range->gpu_mapped)) + return false; + + return drm_gpusvm_range_start(range) < mmu_range->start || + drm_gpusvm_range_end(range) > mmu_range->end; +} + +void +amdgpu_svm_restore_invalidate(struct amdgpu_svm *svm, + struct drm_gpusvm_notifier *notifier, + const struct mmu_notifier_range *mmu_range, + struct drm_gpusvm_range *first, + uint64_t adj_start, uint64_t adj_end) +{ + struct drm_gpusvm_range *r; + bool is_unmap = mmu_range->event == MMU_NOTIFY_UNMAP; + bool needs_flush = false; + + if (is_unmap) { + bool quiesced = false; + + amdgpu_svm_capture_checkpoint_ts(svm); + + r = first; + drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) { + if (!quiesced && + svm_restore_unmap_needs_quiesce(r, mmu_range)) { + svm_restore_eviction_begin(svm); + quiesced = true; + } + + needs_flush |= amdgpu_svm_range_notifier_event_begin( + svm, r, mmu_range); + } + + if (needs_flush) + svm->flush_tlb(svm); + } else { + svm_restore_eviction_begin(svm); + + r = first; + drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) + amdgpu_svm_range_invalidate_gpu_mapping( + to_amdgpu_svm_range(r)); + } + + r = first; + drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) + svm_restore_notifier_event_end(svm, r, mmu_range); +} + +static void +svm_restore_enqueue_work(struct amdgpu_svm *svm, + struct amdgpu_svm_range *range, + unsigned long start_page, + unsigned long last_page, + uint8_t pending_ops) +{ + bool queue_gc_work = false; + bool queue_restore_work = false; + + if (atomic_read(&svm->exiting)) + return; + + spin_lock(&svm->work_lock); + + /* Deny any work if range is unmapped */ + if (UNMAP_WORK(range->pending_ops)) { + spin_unlock(&svm->work_lock); + return; + } + + range->pending_start_page = min(range->pending_start_page, start_page); + range->pending_last_page = max(range->pending_last_page, last_page); + + if (UNMAP_WORK(pending_ops)) { + range->pending_ops = AMDGPU_SVM_RANGE_OP_UNMAP; + + switch (range->queue_state) { + case AMDGPU_SVM_RANGE_NOT_QUEUED: + drm_gpusvm_range_get(&range->base); + list_add_tail(&range->work_node, &svm->gc.list); + range->queue_state = AMDGPU_SVM_RANGE_IN_GC; + queue_gc_work = true; + break; + case AMDGPU_SVM_RANGE_IN_RESTORE: + /* Not in processing so move to gc is safe. */ + list_move_tail(&range->work_node, &svm->gc.list); + range->queue_state = AMDGPU_SVM_RANGE_IN_GC; + queue_gc_work = true; + break; + case AMDGPU_SVM_RANGE_PROCESSING: + /* Do not move range into gc, because if the range is being + * processed by the restore worker, moving it to GC may cause + * use-after-free: the GC worker would remove the range while + * the restore worker is still using it. + * Owner worker will see UNMAP in pending_ops by + * put_if_dequeued() and move to gc after processing the + * restore work. + */ + break; + case AMDGPU_SVM_RANGE_IN_GC: + break; + } + } else { + switch (range->queue_state) { + case AMDGPU_SVM_RANGE_NOT_QUEUED: + range->pending_ops |= AMDGPU_SVM_RANGE_OP_RESTORE; + drm_gpusvm_range_get(&range->base); + list_add_tail(&range->work_node, &svm->restore.list); + range->queue_state = AMDGPU_SVM_RANGE_IN_RESTORE; + queue_restore_work = true; + break; + case AMDGPU_SVM_RANGE_IN_RESTORE: + range->pending_ops |= AMDGPU_SVM_RANGE_OP_RESTORE; + break; + case AMDGPU_SVM_RANGE_PROCESSING: + /* + * Owner worker will see RESTORE in pending_ops + * via put_if_dequeued() and queue into restore worker. + */ + range->pending_ops |= AMDGPU_SVM_RANGE_OP_RESTORE; + break; + case AMDGPU_SVM_RANGE_IN_GC: + /* Should not goto here cause UNMAP denies all the work. */ + break; + } + } + + spin_unlock(&svm->work_lock); + + if (queue_gc_work) + queue_work(svm->gc.wq, &svm->gc.work); + if (queue_restore_work) + queue_delayed_work(svm->restore.wq, &svm->restore.work, + msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); +} + +int amdgpu_svm_restore_init(struct amdgpu_svm *svm, + void (*begin)(struct amdgpu_svm *), + void (*end)(struct amdgpu_svm *)) +{ + svm->restore.wq = alloc_ordered_workqueue(AMDGPU_SVM_RESTORE_WQ_NAME, + WQ_HIGHPRI | WQ_MEM_RECLAIM); + if (!svm->restore.wq) + return -ENOMEM; + + svm->restore.begin = begin; + svm->restore.end = end; + atomic_set(&svm->restore.quiesced, 0); + atomic_set(&svm->restore.evicted_ranges, 0); + INIT_LIST_HEAD(&svm->restore.list); + INIT_DELAYED_WORK(&svm->restore.work, amdgpu_svm_restore_worker); + return 0; +} + +void amdgpu_svm_restore_fini(struct amdgpu_svm *svm) +{ + cancel_delayed_work_sync(&svm->restore.work); + amdgpu_svm_clean_queue(svm, &svm->restore.list); + atomic_set(&svm->restore.evicted_ranges, 0); + if (atomic_read(&svm->restore.quiesced)) + svm->restore.end(svm); + destroy_workqueue(svm->restore.wq); + svm->restore.wq = NULL; +} + +void amdgpu_svm_restore_flush(struct amdgpu_svm *svm) +{ + flush_delayed_work(&svm->restore.work); +} -- 2.34.1
