From: Honglei Huang <[email protected]> - KFD queue quiesce/resume: reuse kfd api - GC worker: processes unmap events by clearing attributes and rebuilding GPU mappings, queue into restore queue if rebuild failed. - Restore worker: restore evicted ranges via attr tree lookup - Flush/sync helpers for orderly shutdown
Signed-off-by: Honglei Huang <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c | 404 ++++++++++++++++++ 1 file changed, 404 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c index eba0a52be..472a641fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_range.c @@ -114,6 +114,7 @@ range_pages_valid(struct amdgpu_svm *svm, return drm_gpusvm_range_pages_valid(&svm->gpusvm, range); } + static int amdgpu_svm_range_gpu_unmap_in_notifier(struct amdgpu_svm *svm, struct drm_gpusvm_range *range, @@ -246,6 +247,59 @@ amdgpu_svm_range_attr_pte_flags(struct amdgpu_svm *svm, return pte_flags; } + /* + * POC/WA: reuse kfd apis for queue quiesce/resume + * But kfd apis are for process level, not for GPU VM level + * need consider potential issues + */ +void amdgpu_svm_range_restore_begin_compute(struct amdgpu_svm *svm) +{ + int ret; + + if (!svm->gpusvm.mm) + return; + + if (atomic_cmpxchg(&svm->kfd_queues_quiesced, 0, 1) != 0) + return; + + ret = kgd2kfd_quiesce_mm(svm->gpusvm.mm, KFD_QUEUE_EVICTION_TRIGGER_SVM); + if (ret == -ESRCH) { + AMDGPU_SVM_TRACE("kfd quiesce skipped no KFD process\n"); + atomic_set(&svm->kfd_queues_quiesced, 0); + return; + } + + if (ret) { + AMDGPU_SVM_TRACE("kfd quiesce failed ret=%d\n", ret); + atomic_set(&svm->kfd_queues_quiesced, 0); + return; + } + + AMDGPU_SVM_TRACE("kfd quiesce ret=%d\n", ret); +} + +void amdgpu_svm_range_restore_end_compute(struct amdgpu_svm *svm) +{ + int ret; + + if (atomic_cmpxchg(&svm->kfd_queues_quiesced, 1, 0) != 1) + return; + + if (!svm->gpusvm.mm) + return; + + ret = kgd2kfd_resume_mm(svm->gpusvm.mm); + if (ret == -ESRCH) { + AMDGPU_SVM_TRACE("kfd resume skipped no KFD process\n"); + return; + } + + if (ret) + AMDGPU_SVM_TRACE("kfd resume failed ret=%d\n", ret); + else + AMDGPU_SVM_TRACE("kfd resume ret=%d\n", ret); +} + static int amdgpu_svm_range_lock_vm_pd(struct amdgpu_svm *svm, struct drm_exec *exec) { int ret; @@ -746,6 +800,169 @@ int amdgpu_svm_range_apply_attr_change(struct amdgpu_svm *svm, return amdgpu_svm_range_map_interval(svm, start, last, new_attrs); } +static bool +range_dequeue_locked(struct amdgpu_svm *svm, + struct list_head *work_list, + bool restore_queue, + struct range_pending_op_ctx *op_ctx) +{ + struct amdgpu_svm_range *range; + + lockdep_assert_held(&svm->gc_lock); + + range = list_first_entry_or_null(work_list, struct amdgpu_svm_range, + gc_node); + if (!range) + return false; + + list_del_init(&range->gc_node); + if (restore_queue) + range->restore_queued = false; + else + range->gc_queued = false; + + op_ctx->range = range; + op_ctx->start = range->pending_start; + op_ctx->last = range->pending_last; + op_ctx->pending_ops = range->pending_ops; + + range->pending_start = ULONG_MAX; + range->pending_last = 0; + range->pending_ops = AMDGPU_SVM_RANGE_PENDING_OP_NONE; + + return true; +} + +static void +range_requeue_restore_locked(struct amdgpu_svm *svm, + struct amdgpu_svm_range *range, + unsigned long start, + unsigned long last) +{ + lockdep_assert_held(&svm->gc_lock); + + range->pending_start = min(range->pending_start, start); + range->pending_last = max(range->pending_last, last); + range->pending_ops |= AMDGPU_SVM_RANGE_PENDING_OP_RESTORE; + + if (!range->gc_queued && !range->restore_queued) { + list_add_tail(&range->gc_node, &svm->restore_work_list); + range->restore_queued = true; + } +} + +static bool +range_try_dequeue(struct amdgpu_svm_range *range) +{ + if (!range->in_queue) + return false; + + if (range->gc_queued || range->restore_queued || + range->pending_start <= range->pending_last || + range->pending_ops != AMDGPU_SVM_RANGE_PENDING_OP_NONE) + return false; + + range->in_queue = false; + return true; +} + +static void +range_put_if_dequeued(struct amdgpu_svm *svm, + struct amdgpu_svm_range *range) +{ + bool dequeue; + + spin_lock(&svm->gc_lock); + dequeue = range_try_dequeue(range); + spin_unlock(&svm->gc_lock); + + if (dequeue) + drm_gpusvm_range_put(&range->base); +} + +static void +amdgpu_svm_range_enqueue(struct amdgpu_svm *svm, + struct amdgpu_svm_range *range, + unsigned long start, + unsigned long last, + enum amdgpu_svm_range_queue_op op) +{ + bool queue_gc_work = false; + bool queue_restore_work = false; + + if (atomic_read(&svm->exiting)) + return; + + spin_lock(&svm->gc_lock); + if (!range->in_queue) { + drm_gpusvm_range_get(&range->base); + range->in_queue = true; + } + + range->pending_start = min(range->pending_start, start); + range->pending_last = max(range->pending_last, last); + + switch (op) { + case AMDGPU_SVM_RANGE_OP_UNMAP: + range->pending_ops |= AMDGPU_SVM_RANGE_PENDING_OP_UNMAP; + if (NEED_REBUILD(svm)) + range->pending_ops |= AMDGPU_SVM_RANGE_PENDING_OP_RESTORE; + break; + case AMDGPU_SVM_RANGE_OP_RESTORE: + range->pending_ops |= AMDGPU_SVM_RANGE_PENDING_OP_RESTORE; + break; + } + + if (UNMAP_WORK(range->pending_ops)) { + if (range->restore_queued) { + list_move_tail(&range->gc_node, &svm->gc_list); + range->restore_queued = false; + range->gc_queued = true; + } else if (!range->gc_queued) { + list_add_tail(&range->gc_node, &svm->gc_list); + range->gc_queued = true; + } + queue_gc_work = true; + } else if (RESTORE_WORK(range->pending_ops)) { + if (!range->gc_queued && !range->restore_queued) { + list_add_tail(&range->gc_node, &svm->restore_work_list); + range->restore_queued = true; + } + queue_restore_work = true; + } + + spin_unlock(&svm->gc_lock); + + if (queue_gc_work) + queue_work(svm->gc_wq, &svm->gc_work); + if (queue_restore_work) + queue_delayed_work(svm->restore_wq, &svm->restore_work, + msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); +} + +static int +amdgpu_svm_range_process_unmap_interval(struct amdgpu_svm *svm, + unsigned long start, unsigned long last, + bool rebuild) +{ + int ret = 0; + + down_write(&svm->svm_lock); + /* clean attrs */ + amdgpu_svm_attr_clear_pages(svm->attr_tree, start, last); + + /* rebuild if needed */ + if (amdgpu_svm_range_interval_has_range(svm, start, last)) + ret = amdgpu_svm_range_rebuild_locked(svm, start, last, rebuild); + + up_write(&svm->svm_lock); + + AMDGPU_SVM_TRACE("work=UNMAP ret=%d start=0x%lx last=0x%lx rebuild=%d\n", + ret, start, last, rebuild ? 1 : 0); + + return ret; +} + static void amdgpu_svm_range_begin_restore(struct amdgpu_svm *svm) { if (atomic_inc_return(&svm->evicted_ranges) != 1) @@ -754,6 +971,121 @@ static void amdgpu_svm_range_begin_restore(struct amdgpu_svm *svm) svm->begin_restore(svm); } +static void amdgpu_svm_range_restore_worker(struct work_struct *w) +{ + struct delayed_work *dwork = to_delayed_work(w); + struct amdgpu_svm *svm = container_of(dwork, struct amdgpu_svm, restore_work); + unsigned long resched_delay = + max_t(unsigned long, 1, + msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); + struct range_pending_op_ctx op_ctx; + int evicted_record; + bool need_resched = false; + bool has_pending; + int ret; + + if (atomic_read(&svm->exiting)) + return; + + evicted_record = atomic_read(&svm->evicted_ranges); + if (!evicted_record) + return; + + if (!svm->gpusvm.mm) { + atomic_set(&svm->evicted_ranges, 0); + svm->end_restore(svm); + return; + } + + spin_lock(&svm->gc_lock); + while (range_dequeue_locked(svm, &svm->restore_work_list, + true, &op_ctx)) { + spin_unlock(&svm->gc_lock); + + down_write(&svm->svm_lock); + ret = amdgpu_svm_range_map_attr_ranges(svm, op_ctx.start, + op_ctx.last); + up_write(&svm->svm_lock); + + if (ret) { + AMDGPU_SVM_TRACE("restore work retry ret=%d start=0x%lx last=0x%lx ret=%d\n", + ret, op_ctx.start, op_ctx.last, ret); + spin_lock(&svm->gc_lock); + range_requeue_restore_locked(svm, op_ctx.range, + op_ctx.start, op_ctx.last); + spin_unlock(&svm->gc_lock); + need_resched = true; + } + + range_put_if_dequeued(svm, op_ctx.range); + spin_lock(&svm->gc_lock); + } + spin_unlock(&svm->gc_lock); + + spin_lock(&svm->gc_lock); + has_pending = !list_empty(&svm->restore_work_list) || + !list_empty(&svm->gc_list); + spin_unlock(&svm->gc_lock); + + if (!need_resched && !has_pending) { + + drm_gpusvm_notifier_lock(&svm->gpusvm); + spin_lock(&svm->gc_lock); + + has_pending = !list_empty(&svm->restore_work_list) || !list_empty(&svm->gc_list); + + spin_unlock(&svm->gc_lock); + + if (!has_pending && + atomic_cmpxchg(&svm->evicted_ranges, evicted_record, 0) == evicted_record) { + + drm_gpusvm_notifier_unlock(&svm->gpusvm); + svm->end_restore(svm); + return; + + } + drm_gpusvm_notifier_unlock(&svm->gpusvm); + } + + queue_delayed_work(svm->restore_wq, &svm->restore_work, resched_delay); +} + +static void amdgpu_svm_range_gc_worker(struct work_struct *w) +{ + struct amdgpu_svm *svm = container_of(w, struct amdgpu_svm, gc_work); + struct range_pending_op_ctx op_ctx; + + spin_lock(&svm->gc_lock); + while (range_dequeue_locked(svm, &svm->gc_list, + false, &op_ctx)) { + int ret = 0; + + spin_unlock(&svm->gc_lock); + + if (UNMAP_WORK(op_ctx.pending_ops)) + ret = amdgpu_svm_range_process_unmap_interval(svm, + op_ctx.start, op_ctx.last, + NEED_REBUILD(svm)); + + if (RESTORE_WORK(op_ctx.pending_ops)) { + /* queue into restore wq, if rebuild failed */ + if (NEED_REBUILD(svm) && !ret) + queue_delayed_work(svm->restore_wq, + &svm->restore_work, + msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); + else + amdgpu_svm_range_enqueue(svm, op_ctx.range, + op_ctx.start, + op_ctx.last, + AMDGPU_SVM_RANGE_OP_RESTORE); + } + + range_put_if_dequeued(svm, op_ctx.range); + spin_lock(&svm->gc_lock); + } + spin_unlock(&svm->gc_lock); +} + void amdgpu_svm_range_invalidate(struct amdgpu_svm *svm, struct drm_gpusvm_notifier *notifier, const struct mmu_notifier_range *mmu_range) @@ -790,3 +1122,75 @@ void amdgpu_svm_range_invalidate(struct amdgpu_svm *svm, amdgpu_svm_range_process_notifier_ranges(svm, notifier, mmu_range, op, queue_op); } + +int amdgpu_svm_range_work_init(struct amdgpu_svm *svm) +{ + svm->gc_wq = alloc_workqueue(AMDGPU_SVM_RANGE_WQ_NAME, + WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM, 0); + if (!svm->gc_wq) + return -ENOMEM; + + svm->restore_wq = alloc_ordered_workqueue(AMDGPU_SVM_RESTORE_WQ_NAME, + WQ_HIGHPRI | WQ_MEM_RECLAIM); + if (!svm->restore_wq) { + destroy_workqueue(svm->gc_wq); + svm->gc_wq = NULL; + return -ENOMEM; + } + + init_rwsem(&svm->svm_lock); + spin_lock_init(&svm->gc_lock); + INIT_LIST_HEAD(&svm->gc_list); + INIT_LIST_HEAD(&svm->restore_work_list); + INIT_WORK(&svm->gc_work, amdgpu_svm_range_gc_worker); + INIT_DELAYED_WORK(&svm->restore_work, amdgpu_svm_range_restore_worker); + + return 0; +} + +void amdgpu_svm_range_flush(struct amdgpu_svm *svm) +{ + flush_work(&svm->gc_work); + flush_delayed_work(&svm->restore_work); + flush_work(&svm->gc_work); +} + +void amdgpu_svm_range_sync_work(struct amdgpu_svm *svm) +{ + amdgpu_svm_range_flush(svm); + flush_workqueue(svm->gc_wq); + flush_workqueue(svm->restore_wq); +} + +static void +amdgpu_svm_range_clean_queue(struct amdgpu_svm *svm, + struct list_head *work_list, + bool restore_queue) +{ + struct range_pending_op_ctx op_ctx; + + spin_lock(&svm->gc_lock); + while (range_dequeue_locked(svm, work_list, + restore_queue, &op_ctx)) { + spin_unlock(&svm->gc_lock); + range_put_if_dequeued(svm, op_ctx.range); + spin_lock(&svm->gc_lock); + } + spin_unlock(&svm->gc_lock); +} + +void amdgpu_svm_range_work_fini(struct amdgpu_svm *svm) +{ + cancel_delayed_work_sync(&svm->restore_work); + flush_work(&svm->gc_work); + amdgpu_svm_range_clean_queue(svm, &svm->gc_list, false); + amdgpu_svm_range_clean_queue(svm, &svm->restore_work_list, true); + atomic_set(&svm->evicted_ranges, 0); + if (atomic_read(&svm->kfd_queues_quiesced)) + svm->end_restore(svm); + + destroy_workqueue(svm->restore_wq); + svm->restore_wq = NULL; + destroy_workqueue(svm->gc_wq); + svm->gc_wq = NULL; +} -- 2.34.1
