> diff --git a/include/linux/interrupt_rc.h b/include/linux/interrupt_rc.h
> new file mode 100644
> index 000000000000..dd4444c61330
> --- /dev/null
> +++ b/include/linux/interrupt_rc.h
> @@ -0,0 +1,67 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * include/linux/interrupt_rc.h - refcounted local processor interrupt
> + * management.
> + *
> + * Since the implementation of this API currently depends on
> + * local_irq_save()/local_irq_restore(), we split this into it's own header 
> to
> + * make it easier to include without hitting circular header dependencies.
> + */
> +
> +#ifndef __LINUX_INTERRUPT_RC_H
> +#define __LINUX_INTERRUPT_RC_H
> +
> +#include <linux/irqflags.h>
> +#include <asm/processor.h>
> +#ifdef CONFIG_SMP
> +#include <asm/smp.h>
> +#endif
> +
> +/* Per-cpu interrupt disabling state for local_interrupt_{disable,enable}() 
> */
> +struct interrupt_disable_state {
> +     unsigned long flags;
> +};
> +
> +DECLARE_PER_CPU(struct interrupt_disable_state, 
> local_interrupt_disable_state);
> +
> +static inline void local_interrupt_disable(void)
> +{
> +     unsigned long flags;
> +     int new_count;
> +
> +     WARN_ON_ONCE(in_nmi());
> +
> +     new_count = hardirq_disable_enter();
> +
> +     /* Interrupts can happen here, but it's OK, see __irq_exit_rcu(). */
> +
> +     if ((new_count & HARDIRQ_DISABLE_MASK) == HARDIRQ_DISABLE_OFFSET) {
> +             local_irq_save(flags);
> +             raw_cpu_write(local_interrupt_disable_state.flags, flags);
> +     }
> +}
> +
> +static inline void local_interrupt_enable(void)
> +{
> +     int new_count;
> +
> +     new_count = hardirq_disable_exit();
> +
> +     if ((new_count & HARDIRQ_DISABLE_MASK) == 0) {
> +             unsigned long flags;
> +
> +             flags = raw_cpu_read(local_interrupt_disable_state.flags);
> +             local_irq_restore(flags);

Could this lead to indefinite softirq starvation? When __irq_exit_rcu()
skips invoke_softirq() due to hardirq_disable_count() > 0, any scheduled
softirqs (timers, RCU callbacks, networking) remain pending. When the
outermost critical section ends and local_interrupt_enable() restores
hardware interrupts, it doesn't check local_softirq_pending() to flush
these deferred softirqs.

Will these pending softirqs stall until another unrelated hardware
interrupt fires? In low-activity periods, this could potentially cause
RCU stalls or network hangs. This concern was raised by the sashiko bot in
both v2 and v3 but remains unaddressed:

https://lore.kernel.org/all/[email protected]/

> +             /*
> +              * TODO: re-read preempt count can be avoided, but it needs
> +              * should_resched() taking another parameter as the current
> +              * preempt count
> +              */
> +#ifdef CONFIG_PREEMPTION
> +             if (should_resched(0))
> +                     __preempt_schedule();

Peter Zijlstra suggested in v2 replacing should_resched(0) with a direct
check of new_count for preemption. His suggestion was "can't you simply
write: if (!new_count) __preempt_schedule();" The author acknowledged this
in discussion but hasn't implemented it in v3:

https://lore.kernel.org/all/[email protected]/

Is there a reason to keep the more complex should_resched(0) approach, or
would the direct check be more appropriate here?

> +#endif
> +     }
> +}
> +
> +#endif /* !__LINUX_INTERRUPT_RC_H */

[ ... ]

> diff --git a/kernel/softirq.c b/kernel/softirq.c
> index 10af5ed859e7..d1ab1799794c 100644
> --- a/kernel/softirq.c
> +++ b/kernel/softirq.c
> @@ -88,6 +88,9 @@ EXPORT_PER_CPU_SYMBOL_GPL(hardirqs_enabled);
>  EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context);
>  #endif
>
> +DEFINE_PER_CPU(struct interrupt_disable_state, 
> local_interrupt_disable_state);
> +EXPORT_PER_CPU_SYMBOL_GPL(local_interrupt_disable_state);
> +
>  DEFINE_PER_CPU(unsigned int, nmi_nesting);
>
>  /*
> @@ -728,7 +731,16 @@ static inline void __irq_exit_rcu(void)
>  #endif
>       account_hardirq_exit(current);
>       preempt_count_sub(HARDIRQ_OFFSET);
> -     if (!in_interrupt() && local_softirq_pending()) {
> +     /*
> +      * Interrupts may happen between hardirq_disable_enter() and
> +      * local_irq_save() in local_interrupt_disable(), if irq_exit() invokes
> +      * softirq here, we may have a softirq handler calling
> +      * local_interrupt_disable() but it won't disable the irq because
> +      * hardirq disabling count is already 1, hence we need to prevent
> +      * invoking softirq when a local_interrupt_disable() is ongoing.
> +      */
> +     if (!in_interrupt() && !hardirq_disable_count() &&
> +         local_softirq_pending()) {
>               /*
>                * If we left hrtimers unarmed, make sure to arm them now,
>                * before enabling interrupts to run SoftIRQ.


---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md

CI run summary: https://github.com/kernel-patches/bpf/actions/runs/26998319662

Reply via email to