> diff --git a/include/linux/interrupt_rc.h b/include/linux/interrupt_rc.h
> new file mode 100644
> index 000000000000..dd4444c61330
> --- /dev/null
> +++ b/include/linux/interrupt_rc.h
> @@ -0,0 +1,67 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * include/linux/interrupt_rc.h - refcounted local processor interrupt
> + * management.
> + *
> + * Since the implementation of this API currently depends on
> + * local_irq_save()/local_irq_restore(), we split this into it's own header
> to
> + * make it easier to include without hitting circular header dependencies.
> + */
> +
> +#ifndef __LINUX_INTERRUPT_RC_H
> +#define __LINUX_INTERRUPT_RC_H
> +
> +#include <linux/irqflags.h>
> +#include <asm/processor.h>
> +#ifdef CONFIG_SMP
> +#include <asm/smp.h>
> +#endif
> +
> +/* Per-cpu interrupt disabling state for local_interrupt_{disable,enable}()
> */
> +struct interrupt_disable_state {
> + unsigned long flags;
> +};
> +
> +DECLARE_PER_CPU(struct interrupt_disable_state,
> local_interrupt_disable_state);
> +
> +static inline void local_interrupt_disable(void)
> +{
> + unsigned long flags;
> + int new_count;
> +
> + WARN_ON_ONCE(in_nmi());
> +
> + new_count = hardirq_disable_enter();
> +
> + /* Interrupts can happen here, but it's OK, see __irq_exit_rcu(). */
> +
> + if ((new_count & HARDIRQ_DISABLE_MASK) == HARDIRQ_DISABLE_OFFSET) {
> + local_irq_save(flags);
> + raw_cpu_write(local_interrupt_disable_state.flags, flags);
> + }
> +}
> +
> +static inline void local_interrupt_enable(void)
> +{
> + int new_count;
> +
> + new_count = hardirq_disable_exit();
> +
> + if ((new_count & HARDIRQ_DISABLE_MASK) == 0) {
> + unsigned long flags;
> +
> + flags = raw_cpu_read(local_interrupt_disable_state.flags);
> + local_irq_restore(flags);
Could this lead to indefinite softirq starvation? When __irq_exit_rcu()
skips invoke_softirq() due to hardirq_disable_count() > 0, any scheduled
softirqs (timers, RCU callbacks, networking) remain pending. When the
outermost critical section ends and local_interrupt_enable() restores
hardware interrupts, it doesn't check local_softirq_pending() to flush
these deferred softirqs.
Will these pending softirqs stall until another unrelated hardware
interrupt fires? In low-activity periods, this could potentially cause
RCU stalls or network hangs. This concern was raised by the sashiko bot in
both v2 and v3 but remains unaddressed:
https://lore.kernel.org/all/[email protected]/
> + /*
> + * TODO: re-read preempt count can be avoided, but it needs
> + * should_resched() taking another parameter as the current
> + * preempt count
> + */
> +#ifdef CONFIG_PREEMPTION
> + if (should_resched(0))
> + __preempt_schedule();
Peter Zijlstra suggested in v2 replacing should_resched(0) with a direct
check of new_count for preemption. His suggestion was "can't you simply
write: if (!new_count) __preempt_schedule();" The author acknowledged this
in discussion but hasn't implemented it in v3:
https://lore.kernel.org/all/[email protected]/
Is there a reason to keep the more complex should_resched(0) approach, or
would the direct check be more appropriate here?
> +#endif
> + }
> +}
> +
> +#endif /* !__LINUX_INTERRUPT_RC_H */
[ ... ]
> diff --git a/kernel/softirq.c b/kernel/softirq.c
> index 10af5ed859e7..d1ab1799794c 100644
> --- a/kernel/softirq.c
> +++ b/kernel/softirq.c
> @@ -88,6 +88,9 @@ EXPORT_PER_CPU_SYMBOL_GPL(hardirqs_enabled);
> EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context);
> #endif
>
> +DEFINE_PER_CPU(struct interrupt_disable_state,
> local_interrupt_disable_state);
> +EXPORT_PER_CPU_SYMBOL_GPL(local_interrupt_disable_state);
> +
> DEFINE_PER_CPU(unsigned int, nmi_nesting);
>
> /*
> @@ -728,7 +731,16 @@ static inline void __irq_exit_rcu(void)
> #endif
> account_hardirq_exit(current);
> preempt_count_sub(HARDIRQ_OFFSET);
> - if (!in_interrupt() && local_softirq_pending()) {
> + /*
> + * Interrupts may happen between hardirq_disable_enter() and
> + * local_irq_save() in local_interrupt_disable(), if irq_exit() invokes
> + * softirq here, we may have a softirq handler calling
> + * local_interrupt_disable() but it won't disable the irq because
> + * hardirq disabling count is already 1, hence we need to prevent
> + * invoking softirq when a local_interrupt_disable() is ongoing.
> + */
> + if (!in_interrupt() && !hardirq_disable_count() &&
> + local_softirq_pending()) {
> /*
> * If we left hrtimers unarmed, make sure to arm them now,
> * before enabling interrupts to run SoftIRQ.
---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
CI run summary: https://github.com/kernel-patches/bpf/actions/runs/26998319662