> + * amdgpu_vm_increment_process_counter - increment a process profiling > counter > + * > + * @vm: the VM to update > + * @stat_type: which counter to increment > + * > + * NOTE: This function is called from IRQ context in amdgpu_irq_dispatch(), > + * so we MUST use spin_lock_irqsave/spin_unlock_irqrestore to prevent > + * deadlock when an interrupt arrives while another context holds > status_lock. > + * > + * The deadlock scenario without IRQ-safe locking: > + * 1. Process context: amdgpu_vm_bo_del() acquires > spin_lock(&vm->status_lock) > + * 2. IRQ arrives on same CPU > + * 3. IRQ handler: amdgpu_vm_increment_process_counter() tries to acquire > + * spin_lock(&vm->status_lock) -> DEADLOCK > + */
I don't think there's any need for a comment this long on something as common as spin_lock_irqsave() - especially if we're going to be using it all over the place. If you must, a /* This function can be called from IRQ context */ after spin_lock_irqsave() would be more than sufficient. Similarly, I don't know if we need spin_lock_irqsave() all over the place. Many of these functions can definitely not be called from an IRQ context. Third, this is for dkms branch only; the function it modifies is not upstream. Not sure if that means this discussion should be taken off this public mailing list. Thanks, David ________________________________________ From: Martin, Andrew <[email protected]> Sent: Monday, May 4, 2026 6:09 PM To: [email protected] Cc: Francis, David; Martin, Andrew Subject: [PATCH] drm/amdkfd: Fix IRQ-unsafe spinlock This patch fixes a multiple CPU hangs caused by the improper use of spin_lock that are not IRQ safe. This allow the software driver to get into some sort of deadlock/race condition. It happen of kernels that have CONFIG_PROVE_LOCKING=y and or CONFIG_PREEMPT (full preemption). Fixes: 4c131aa02bc4 ("drm/amdgpu: Add profiling counters in fdinfo") Signed-off-by: Andrew Martin <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 158 ++++++++++++++++--------- 1 file changed, 105 insertions(+), 53 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 8de9b7b3cc88..dc0fbf134fc0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -165,14 +165,16 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo) struct amdgpu_vm *vm = vm_bo->vm; struct amdgpu_bo *bo = vm_bo->bo; + unsigned long flags; + vm_bo->moved = true; amdgpu_vm_assert_locked(vm); - spin_lock(&vm_bo->vm->status_lock); + spin_lock_irqsave(&vm_bo->vm->status_lock, flags); if (bo->tbo.type == ttm_bo_type_kernel) list_move(&vm_bo->vm_status, &vm->evicted); else list_move_tail(&vm_bo->vm_status, &vm->evicted); - spin_unlock(&vm_bo->vm->status_lock); + spin_unlock_irqrestore(&vm_bo->vm->status_lock, flags); } /** * amdgpu_vm_bo_moved - vm_bo is moved @@ -184,10 +186,12 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo) { + unsigned long flags; + amdgpu_vm_assert_locked(vm_bo->vm); - spin_lock(&vm_bo->vm->status_lock); + spin_lock_irqsave(&vm_bo->vm->status_lock, flags); list_move(&vm_bo->vm_status, &vm_bo->vm->moved); - spin_unlock(&vm_bo->vm->status_lock); + spin_unlock_irqrestore(&vm_bo->vm->status_lock, flags); } /** @@ -200,10 +204,12 @@ static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo) { + unsigned long flags; + amdgpu_vm_assert_locked(vm_bo->vm); - spin_lock(&vm_bo->vm->status_lock); + spin_lock_irqsave(&vm_bo->vm->status_lock, flags); list_move(&vm_bo->vm_status, &vm_bo->vm->idle); - spin_unlock(&vm_bo->vm->status_lock); + spin_unlock_irqrestore(&vm_bo->vm->status_lock, flags); vm_bo->moved = false; } @@ -217,9 +223,11 @@ static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo) { - spin_lock(&vm_bo->vm->status_lock); + unsigned long flags; + + spin_lock_irqsave(&vm_bo->vm->status_lock, flags); list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated); - spin_unlock(&vm_bo->vm->status_lock); + spin_unlock_irqrestore(&vm_bo->vm->status_lock, flags); } /** @@ -232,10 +240,12 @@ static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo) { + unsigned long flags; + vm_bo->moved = true; - spin_lock(&vm_bo->vm->status_lock); + spin_lock_irqsave(&vm_bo->vm->status_lock, flags); list_move(&vm_bo->vm_status, &vm_bo->vm->evicted_user); - spin_unlock(&vm_bo->vm->status_lock); + spin_unlock_irqrestore(&vm_bo->vm->status_lock, flags); } /** @@ -248,11 +258,13 @@ static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo) { + unsigned long flags; + amdgpu_vm_assert_locked(vm_bo->vm); if (vm_bo->bo->parent) { - spin_lock(&vm_bo->vm->status_lock); + spin_lock_irqsave(&vm_bo->vm->status_lock, flags); list_move(&vm_bo->vm_status, &vm_bo->vm->relocated); - spin_unlock(&vm_bo->vm->status_lock); + spin_unlock_irqrestore(&vm_bo->vm->status_lock, flags); } else { amdgpu_vm_bo_idle(vm_bo); } @@ -268,10 +280,12 @@ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo) { + unsigned long flags; + amdgpu_vm_assert_locked(vm_bo->vm); - spin_lock(&vm_bo->vm->status_lock); + spin_lock_irqsave(&vm_bo->vm->status_lock, flags); list_move(&vm_bo->vm_status, &vm_bo->vm->done); - spin_unlock(&vm_bo->vm->status_lock); + spin_unlock_irqrestore(&vm_bo->vm->status_lock, flags); } /** @@ -284,10 +298,11 @@ static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo) static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm) { struct amdgpu_vm_bo_base *vm_bo, *tmp; + unsigned long flags; amdgpu_vm_assert_locked(vm); - spin_lock(&vm->status_lock); + spin_lock_irqsave(&vm->status_lock, flags); list_splice_init(&vm->done, &vm->invalidated); list_for_each_entry(vm_bo, &vm->invalidated, vm_status) vm_bo->moved = true; @@ -301,7 +316,7 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm) else if (bo->parent) list_move(&vm_bo->vm_status, &vm_bo->vm->relocated); } - spin_unlock(&vm->status_lock); + spin_unlock_irqrestore(&vm->status_lock, flags); } /** @@ -320,8 +335,10 @@ static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base) uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo); bool shared; + unsigned long flags; + dma_resv_assert_held(bo->tbo.base.resv); - spin_lock(&vm->status_lock); + spin_lock_irqsave(&vm->status_lock, flags); shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base); if (base->shared != shared) { base->shared = shared; @@ -333,7 +350,7 @@ static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base) vm->stats[bo_memtype].drm.private += size; } } - spin_unlock(&vm->status_lock); + spin_unlock_irqrestore(&vm->status_lock, flags); } /** @@ -404,10 +421,11 @@ void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base, struct ttm_resource *res, int sign) { struct amdgpu_vm *vm = base->vm; + unsigned long flags; - spin_lock(&vm->status_lock); + spin_lock_irqsave(&vm->status_lock, flags); amdgpu_vm_update_stats_locked(base, res, sign); - spin_unlock(&vm->status_lock); + spin_unlock_irqrestore(&vm->status_lock, flags); } /** @@ -428,15 +446,17 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, base->next = NULL; INIT_LIST_HEAD(&base->vm_status); + unsigned long flags; + if (!bo) return; base->next = bo->vm_bo; bo->vm_bo = base; - spin_lock(&vm->status_lock); + spin_lock_irqsave(&vm->status_lock, flags); base->shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base); amdgpu_vm_update_stats_locked(base, bo->tbo.resource, +1); - spin_unlock(&vm->status_lock); + spin_unlock_irqrestore(&vm->status_lock, flags); if (!amdgpu_vm_is_bo_always_valid(vm, bo)) return; @@ -492,28 +512,29 @@ int amdgpu_vm_lock_done_list(struct amdgpu_vm *vm, struct drm_exec *exec, struct list_head *prev = &vm->done; struct amdgpu_bo_va *bo_va; struct amdgpu_bo *bo; + unsigned long flags; int ret; /* We can only trust prev->next while holding the lock */ - spin_lock(&vm->status_lock); + spin_lock_irqsave(&vm->status_lock, flags); while (!list_is_head(prev->next, &vm->done)) { bo_va = list_entry(prev->next, typeof(*bo_va), base.vm_status); bo = bo_va->base.bo; if (bo) { amdgpu_bo_ref(bo); - spin_unlock(&vm->status_lock); + spin_unlock_irqrestore(&vm->status_lock, flags); ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 1); amdgpu_bo_unref(&bo); if (unlikely(ret)) return ret; - spin_lock(&vm->status_lock); + spin_lock_irqsave(&vm->status_lock, flags); } prev = prev->next; } - spin_unlock(&vm->status_lock); + spin_unlock_irqrestore(&vm->status_lock, flags); return 0; } @@ -611,6 +632,7 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm); struct amdgpu_vm_bo_base *bo_base; struct amdgpu_bo *bo; + unsigned long flags; int r; if (vm->generation != new_vm_generation) { @@ -622,12 +644,12 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, return r; } - spin_lock(&vm->status_lock); + spin_lock_irqsave(&vm->status_lock, flags); while (!list_empty(&vm->evicted)) { bo_base = list_first_entry(&vm->evicted, struct amdgpu_vm_bo_base, vm_status); - spin_unlock(&vm->status_lock); + spin_unlock_irqrestore(&vm->status_lock, flags); bo = bo_base->bo; @@ -641,13 +663,13 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->update_funcs->map_table(to_amdgpu_bo_vm(bo)); amdgpu_vm_bo_relocated(bo_base); } - spin_lock(&vm->status_lock); + spin_lock_irqsave(&vm->status_lock, flags); } while (ticket && !list_empty(&vm->evicted_user)) { bo_base = list_first_entry(&vm->evicted_user, struct amdgpu_vm_bo_base, vm_status); - spin_unlock(&vm->status_lock); + spin_unlock_irqrestore(&vm->status_lock, flags); bo = bo_base->bo; dma_resv_assert_held(bo->tbo.base.resv); @@ -658,9 +680,9 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, amdgpu_vm_bo_invalidated(bo_base); - spin_lock(&vm->status_lock); + spin_lock_irqsave(&vm->status_lock, flags); } - spin_unlock(&vm->status_lock); + spin_unlock_irqrestore(&vm->status_lock, flags); amdgpu_vm_eviction_lock(vm); vm->evicting = false; @@ -681,6 +703,7 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, */ bool amdgpu_vm_ready(struct amdgpu_vm *vm) { + unsigned long flags; bool ret; amdgpu_vm_assert_locked(vm); @@ -689,9 +712,9 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) ret = !vm->evicting; amdgpu_vm_eviction_unlock(vm); - spin_lock(&vm->status_lock); + spin_lock_irqsave(&vm->status_lock, flags); ret &= list_empty(&vm->evicted); - spin_unlock(&vm->status_lock); + spin_unlock_irqrestore(&vm->status_lock, flags); spin_lock(&vm->immediate.lock); ret &= !vm->immediate.stopped; @@ -990,11 +1013,13 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, LIST_HEAD(relocated); int r, idx; + unsigned long flags; + amdgpu_vm_assert_locked(vm); - spin_lock(&vm->status_lock); + spin_lock_irqsave(&vm->status_lock, flags); list_splice_init(&vm->relocated, &relocated); - spin_unlock(&vm->status_lock); + spin_unlock_irqrestore(&vm->status_lock, flags); if (list_empty(&relocated)) return 0; @@ -1261,10 +1286,12 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM], struct amdgpu_process_stats *process_stats) { - spin_lock(&vm->status_lock); + unsigned long flags; + + spin_lock_irqsave(&vm->status_lock, flags); memcpy(stats, vm->stats, sizeof(*stats) * __AMDGPU_PL_NUM); memcpy(process_stats, &vm->process_stats, sizeof(*process_stats)); - spin_unlock(&vm->status_lock); + spin_unlock_irqrestore(&vm->status_lock, flags); } /** @@ -1634,26 +1661,27 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va; struct dma_resv *resv; bool clear, unlock; + unsigned long flags; int r; - spin_lock(&vm->status_lock); + spin_lock_irqsave(&vm->status_lock, flags); while (!list_empty(&vm->moved)) { bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va, base.vm_status); - spin_unlock(&vm->status_lock); + spin_unlock_irqrestore(&vm->status_lock, flags); /* Per VM BOs never need to bo cleared in the page tables */ r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; - spin_lock(&vm->status_lock); + spin_lock_irqsave(&vm->status_lock, flags); } while (!list_empty(&vm->invalidated)) { bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, base.vm_status); resv = bo_va->base.bo->tbo.base.resv; - spin_unlock(&vm->status_lock); + spin_unlock_irqrestore(&vm->status_lock, flags); /* Try to reserve the BO to avoid clearing its ptes */ if (!adev->debug_vm && dma_resv_trylock(resv)) { @@ -1685,9 +1713,9 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, bo_va->base.bo->tbo.resource->mem_type == TTM_PL_SYSTEM)) amdgpu_vm_bo_evicted_user(&bo_va->base); - spin_lock(&vm->status_lock); + spin_lock_irqsave(&vm->status_lock, flags); } - spin_unlock(&vm->status_lock); + spin_unlock_irqrestore(&vm->status_lock, flags); return 0; } @@ -2225,9 +2253,13 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev, } } - spin_lock(&vm->status_lock); - list_del(&bo_va->base.vm_status); - spin_unlock(&vm->status_lock); + { + unsigned long flags; + + spin_lock_irqsave(&vm->status_lock, flags); + list_del(&bo_va->base.vm_status); + spin_unlock_irqrestore(&vm->status_lock, flags); + } list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { list_del(&mapping->list); @@ -2334,11 +2366,12 @@ void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem, for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { struct amdgpu_vm *vm = bo_base->vm; + unsigned long flags; - spin_lock(&vm->status_lock); + spin_lock_irqsave(&vm->status_lock, flags); amdgpu_vm_update_stats_locked(bo_base, bo->tbo.resource, -1); amdgpu_vm_update_stats_locked(bo_base, new_mem, +1); - spin_unlock(&vm->status_lock); + spin_unlock_irqrestore(&vm->status_lock, flags); } amdgpu_vm_bo_invalidate(bo, evicted); @@ -3104,10 +3137,11 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m) unsigned int total_invalidated_objs = 0; unsigned int total_done_objs = 0; unsigned int id = 0; + unsigned long flags; amdgpu_vm_assert_locked(vm); - spin_lock(&vm->status_lock); + spin_lock_irqsave(&vm->status_lock, flags); seq_puts(m, "\tIdle BOs:\n"); list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) { if (!bo_va->base.bo) @@ -3159,7 +3193,7 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m) continue; total_done += amdgpu_bo_print_info(id++, bo_va->base.bo, m); } - spin_unlock(&vm->status_lock); + spin_unlock_irqrestore(&vm->status_lock, flags); total_done_objs = id; seq_printf(m, "\tTotal idle size: %12lld\tobjs:\t%d\n", total_idle, @@ -3260,9 +3294,27 @@ void amdgpu_sdma_set_vm_pte_scheds(struct amdgpu_device *adev, adev->vm_manager.vm_pte_funcs = vm_pte_funcs; } +/** + * amdgpu_vm_increment_process_counter - increment a process profiling counter + * + * @vm: the VM to update + * @stat_type: which counter to increment + * + * NOTE: This function is called from IRQ context in amdgpu_irq_dispatch(), + * so we MUST use spin_lock_irqsave/spin_unlock_irqrestore to prevent + * deadlock when an interrupt arrives while another context holds status_lock. + * + * The deadlock scenario without IRQ-safe locking: + * 1. Process context: amdgpu_vm_bo_del() acquires spin_lock(&vm->status_lock) + * 2. IRQ arrives on same CPU + * 3. IRQ handler: amdgpu_vm_increment_process_counter() tries to acquire + * spin_lock(&vm->status_lock) -> DEADLOCK + */ void amdgpu_vm_increment_process_counter(struct amdgpu_vm *vm, enum amdgpu_process_stat_type stat_type) { - spin_lock(&vm->status_lock); + unsigned long flags; + + spin_lock_irqsave(&vm->status_lock, flags); switch (stat_type) { case AMDGPU_VM_VMFAULT_COUNTER: vm->process_stats.vmfault_counter++; @@ -3282,5 +3334,5 @@ void amdgpu_vm_increment_process_counter(struct amdgpu_vm *vm, enum amdgpu_proce default: pr_debug("unknown process stat type 0x%x\n", stat_type); } - spin_unlock(&vm->status_lock); + spin_unlock_irqrestore(&vm->status_lock, flags); } -- 2.43.0
