> diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c
> index c091f3232cc5..03d37f72731a 100644
> --- a/kernel/bpf/bpf_lru_list.c
> +++ b/kernel/bpf/bpf_lru_list.c
[ ... ]
> @@ -451,13 +455,12 @@ static struct bpf_lru_node
> *bpf_common_lru_pop_free(struct bpf_lru *lru,
>
> loc_l = per_cpu_ptr(clru->local_list, cpu);
>
> - raw_spin_lock_irqsave(&loc_l->lock, flags);
> + if (!raw_spin_trylock_irqsave(&loc_l->lock, flags))
> + return NULL;
>
> node = __local_list_pop_free(loc_l);
> - if (!node) {
> - bpf_lru_list_pop_free_to_local(lru, loc_l);
> + if (!node && bpf_lru_list_pop_free_to_local(lru, loc_l))
> node = __local_list_pop_free(loc_l);
> - }
>
> if (node)
> __local_list_add_pending(lru, loc_l, cpu, node, hash);
The trylock conversion here looks correct, but what about the steal path
that follows later in bpf_common_lru_pop_free()? Looking at the existing
code (not shown in this diff), the steal loop still uses unconditional
raw_spin_lock_irqsave():
do {
steal_loc_l = per_cpu_ptr(clru->local_list, steal);
raw_spin_lock_irqsave(&steal_loc_l->lock, flags);
...
} while (...);
If the goal is to avoid NMI-context deadlocks when acquiring LRU locks,
can the same deadlock scenario occur when NMI interrupts during the steal
loop and the NMI handler tries to acquire the same steal_loc_l->lock?
Similarly, after a successful steal, there is another unconditional lock:
if (node) {
raw_spin_lock_irqsave(&loc_l->lock, flags);
__local_list_add_pending(lru, loc_l, cpu, node, hash);
raw_spin_unlock_irqrestore(&loc_l->lock, flags);
}
Should these also use trylock to maintain consistency with the stated goal
of avoiding NMI-context deadlocks?
---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
CI run summary: https://github.com/kernel-patches/bpf/actions/runs/21147913717