From: Honglei Huang <[email protected]> Add MMU notifier event handling and garbage collection infrastructure: - amdgpu_svm_range_remove: unmap pages and remove from gpusvm - amdgpu_svm_range_notifier_event_begin: zap PTEs on notifier events - amdgpu_svm_gc_enqueue: queue range for deferred removal with pending page bounds tracking - amdgpu_svm_gc_add_range: mark range unmapped and enqueue for GC - amdgpu_svm_range_notifier_event_end: DMA unmap and GC on MMU_UNMAP - amdgpu_svm_range_invalidate_interval: cursor-based PTE clearing across notifiers/ranges with boundary-crossing range removal - amdgpu_svm_range_dequeue_locked: dequeue work item with atomic pending state transfer - range_try_dequeue: conditional dequeue for reference release - amdgpu_svm_range_put_if_dequeued: release range ref after dequeue
Signed-off-by: Honglei Huang <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c | 228 ++++++++++++++++++ 1 file changed, 228 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c index c733d611d..49240c704 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c @@ -486,3 +486,231 @@ amdgpu_svm_range_map_interval(struct amdgpu_svm *svm, return 0; } +void amdgpu_svm_range_remove(struct amdgpu_svm *svm, + struct amdgpu_svm_range *range, + struct drm_gpusvm_ctx *ctx) +{ + struct drm_gpusvm_range *base = &range->base; + + amdgpu_svm_assert_locked(svm); + + if (!base->pages.flags.unmapped && !base->pages.flags.partial_unmap) + drm_gpusvm_range_unmap_pages(&svm->gpusvm, base, ctx); + + range_invalidate_gpu_mapping(range); + drm_gpusvm_range_remove(&svm->gpusvm, base); +} + +static bool +amdgpu_svm_range_notifier_event_begin(struct amdgpu_svm *svm, + struct drm_gpusvm_range *range, + const struct mmu_notifier_range *mmu_range) +{ + struct amdgpu_svm_range *svm_range = to_amdgpu_svm_range(range); + + amdgpu_svm_assert_in_notifier(svm); + + AMDGPU_SVM_RANGE_DEBUG(svm_range, "NOTIFIER"); + + if (range->pages.flags.unmapped || !svm_range->gpu_mapped) + return false; + + AMDGPU_SVM_RANGE_DEBUG(svm_range, "NOTIFIER - EXECUTE"); + + amdgpu_svm_range_zap_ptes(svm, svm_range, mmu_range); + range_invalidate_gpu_mapping(svm_range); + + return true; +} + +static void +amdgpu_svm_gc_enqueue(struct amdgpu_svm *svm, + struct amdgpu_svm_range *range, + unsigned long start_page, unsigned long last_page) +{ + if (atomic_read(&svm->exiting)) + return; + + spin_lock(&svm->work_lock); + if (!range->in_queue) { + drm_gpusvm_range_get(&range->base); + range->in_queue = true; + } + + range->pending_start_page = min(range->pending_start_page, start_page); + range->pending_last_page = max(range->pending_last_page, last_page); + if (range->pending_ops == AMDGPU_SVM_RANGE_OP_NONE) + list_add_tail(&range->work_node, &svm->gc.list); + range->pending_ops |= AMDGPU_SVM_RANGE_OP_UNMAP; + spin_unlock(&svm->work_lock); + + queue_work(svm->gc.wq, &svm->gc.work); +} + +static void +amdgpu_svm_gc_add_range(struct amdgpu_svm *svm, + struct amdgpu_svm_range *svm_range, + const struct mmu_notifier_range *mmu_range) +{ + unsigned long start_page = max(drm_gpusvm_range_start(&svm_range->base), + mmu_range->start) >> PAGE_SHIFT; + unsigned long last_page = (min(drm_gpusvm_range_end(&svm_range->base), + mmu_range->end) >> PAGE_SHIFT) - 1; + + AMDGPU_SVM_RANGE_DEBUG(svm_range, "GARBAGE COLLECTOR ADD"); + + drm_gpusvm_range_set_unmapped(&svm_range->base, mmu_range); + amdgpu_svm_gc_enqueue(svm, svm_range, start_page, last_page); +} + +static void +amdgpu_svm_range_notifier_event_end(struct amdgpu_svm *svm, + struct drm_gpusvm_range *range, + const struct mmu_notifier_range *mmu_range) +{ + struct drm_gpusvm_ctx ctx = { .in_notifier = true, }; + + amdgpu_svm_assert_in_notifier(svm); + + drm_gpusvm_range_unmap_pages(&svm->gpusvm, range, &ctx); + if (mmu_range->event == MMU_NOTIFY_UNMAP) + amdgpu_svm_gc_add_range(svm, to_amdgpu_svm_range(range), + mmu_range); +} + +int +amdgpu_svm_range_invalidate_interval(struct amdgpu_svm *svm, + unsigned long start_page, + unsigned long last_page) +{ + unsigned long start = start_page << PAGE_SHIFT; + unsigned long end = (last_page + 1) << PAGE_SHIFT; + struct drm_gpusvm_notifier *notifier, *next_notifier; + struct drm_gpusvm_ctx ctx = { .in_notifier = false }; + struct drm_exec exec; + struct dma_fence *fence = NULL; + bool needs_flush = false; + unsigned int flags; + int ret; + + amdgpu_svm_assert_locked(svm); + + ret = amdgpu_svm_range_lock_vm_pd(svm, &exec, true); + if (ret) + return ret; + + drm_gpusvm_for_each_notifier_safe(notifier, next_notifier, &svm->gpusvm, + start, end) { + struct drm_gpusvm_range *range, *next_range; + + drm_gpusvm_for_each_range_safe(range, next_range, notifier, + start, end) { + struct amdgpu_svm_range *svm_range = to_amdgpu_svm_range(range); + unsigned long range_start = drm_gpusvm_range_start(range); + unsigned long range_end = drm_gpusvm_range_end(range); + unsigned long rs = range_start >> PAGE_SHIFT; + unsigned long rl = (range_end >> PAGE_SHIFT) - 1; + bool crosses_boundary = start > range_start || end < range_end; + + if (svm_range->gpu_mapped) { + AMDGPU_SVM_RANGE_DEBUG(svm_range, crosses_boundary ? "ATTR DESTROY" : + "ATTR ZAP PTE"); + + flags = memalloc_noreclaim_save(); + ret = amdgpu_vm_update_range(svm->adev, svm->vm, false, true, true, + false, NULL, rs, rl, 0, 0, 0, NULL, NULL, &fence); + memalloc_noreclaim_restore(flags); + + if (!ret && fence) { + dma_fence_wait(fence, false); + dma_fence_put(fence); + fence = NULL; + } + + if (ret) { + AMDGPU_SVM_TRACE( + "attr invalidate PTE clear failed: ret=%d [0x%lx-0x%lx]\n", + ret, rs, rl); + drm_exec_fini(&exec); + return ret; + } + needs_flush = true; + } + + if (crosses_boundary) { + /* remove the ranges crosses boundary to let GPU fault create new ranges + * bounded by the updated attr_range boundaries. + */ + amdgpu_svm_range_remove(svm, svm_range, &ctx); + } else { + range_invalidate_gpu_mapping(svm_range); + } + } + } + + drm_exec_fini(&exec); + + if (needs_flush) + svm->flush_tlb(svm); + + AMDGPU_SVM_TRACE("attr invalidate done [0x%lx-0x%lx]-0x%lx needs_flush=%d\n", + start_page, last_page, last_page - start_page + 1, + needs_flush ? 1 : 0); + + return 0; +} + +bool +amdgpu_svm_range_dequeue_locked(struct amdgpu_svm *svm, + struct list_head *work_list, + struct amdgpu_svm_range_op_ctx *op_ctx) +{ + struct amdgpu_svm_range *range; + + lockdep_assert_held(&svm->work_lock); + + range = list_first_entry_or_null(work_list, struct amdgpu_svm_range, + work_node); + if (!range) + return false; + + list_del_init(&range->work_node); + + op_ctx->range = range; + op_ctx->start_page = range->pending_start_page; + op_ctx->last_page = range->pending_last_page; + op_ctx->pending_ops = range->pending_ops; + + range->pending_start_page = ULONG_MAX; + range->pending_last_page = 0; + range->pending_ops = AMDGPU_SVM_RANGE_OP_NONE; + + return true; +} + +static bool +range_try_dequeue(struct amdgpu_svm_range *range) +{ + if (!range->in_queue) + return false; + + if (range->pending_ops != AMDGPU_SVM_RANGE_OP_NONE) + return false; + + range->in_queue = false; + return true; +} + +void +amdgpu_svm_range_put_if_dequeued(struct amdgpu_svm *svm, + struct amdgpu_svm_range *range) +{ + bool dequeue; + + spin_lock(&svm->work_lock); + dequeue = range_try_dequeue(range); + spin_unlock(&svm->work_lock); + + if (dequeue) + drm_gpusvm_range_put(&range->base); +} -- 2.34.1
