Re: [PATCH 0/3] warn and suppress irqflood
Hi Thomas, On 2021-03-02 13:15, Sai Prakash Ranjan wrote: Hi Thomas, On Wed, Oct 28, 2020 at 12:58:41PM +0100, Thomas Gleixner wrote: ... Something like the completly untested below should work independent of config options. Thanks, tglx --- include/linux/irqdesc.h |4 ++ kernel/irq/manage.c |3 + kernel/irq/spurious.c | 74 +++- 3 files changed, 61 insertions(+), 20 deletions(-) --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -30,6 +30,8 @@ struct pt_regs; * @tot_count: stats field for non-percpu irqs * @irq_count: stats field to detect stalled irqs * @last_unhandled:aging timer for unhandled count + * @storm_count: Counter for irq storm detection + * @storm_checked: Timestamp for irq storm detection * @irqs_unhandled:stats field for spurious unhandled interrupts * @threads_handled: stats field for deferred spurious detection of threaded handlers * @threads_handled_last: comparator field for deferred spurious detection of theraded handlers @@ -65,6 +67,8 @@ struct irq_desc { unsigned inttot_count; unsigned intirq_count; /* For detecting broken IRQs */ unsigned long last_unhandled; /* Aging timer for unhandled count */ + unsigned long storm_count; + unsigned long storm_checked; unsigned intirqs_unhandled; atomic_tthreads_handled; int threads_handled_last; --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1581,6 +1581,9 @@ static int if (!shared) { init_waitqueue_head(>wait_for_threads); + /* Take a timestamp for interrupt storm detection */ + desc->storm_checked = jiffies; + /* Setup the type (level, edge polarity) if configured: */ if (new->flags & IRQF_TRIGGER_MASK) { ret = __irq_set_trigger(desc, --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -21,6 +21,7 @@ static void poll_spurious_irqs(struct ti static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs); static int irq_poll_cpu; static atomic_t irq_poll_active; +static unsigned long irqstorm_limit __ro_after_init; /* * We wait here for a poller to finish. @@ -189,18 +190,21 @@ static inline int bad_action_ret(irqretu * (The other 100-of-100,000 interrupts may have been a correctly * functioning device sharing an IRQ with the failing one) */ -static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret) +static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret, +bool storm) { unsigned int irq = irq_desc_get_irq(desc); struct irqaction *action; unsigned long flags; - if (bad_action_ret(action_ret)) { - printk(KERN_ERR "irq event %d: bogus return value %x\n", - irq, action_ret); - } else { - printk(KERN_ERR "irq %d: nobody cared (try booting with " + if (!storm) { + if (bad_action_ret(action_ret)) { + pr_err("irq event %d: bogus return value %x\n", + irq, action_ret); + } else { + pr_err("irq %d: nobody cared (try booting with " "the \"irqpoll\" option)\n", irq); + } } dump_stack(); printk(KERN_ERR "handlers:\n"); @@ -228,7 +232,7 @@ static void report_bad_irq(struct irq_de if (count > 0) { count--; - __report_bad_irq(desc, action_ret); + __report_bad_irq(desc, action_ret, false); } } @@ -267,6 +271,33 @@ try_misrouted_irq(unsigned int irq, stru return action && (action->flags & IRQF_IRQPOLL); } +static void disable_stuck_irq(struct irq_desc *desc, irqreturn_t action_ret, + const char *reason, bool storm) +{ + __report_bad_irq(desc, action_ret, storm); + pr_emerg("Disabling %s IRQ #%d\n", reason, irq_desc_get_irq(desc)); + desc->istate |= IRQS_SPURIOUS_DISABLED; + desc->depth++; + irq_disable(desc); +} + +/* Interrupt storm detector for runaway interrupts (handled or not). */ +static bool irqstorm_detected(struct irq_desc *desc) +{ + unsigned long now = jiffies; + + if (++desc->storm_count < irqstorm_limit) { + if (time_after(now, desc->storm_checked + HZ)) { + desc->storm_count = 0; + desc->storm_checked = now; + } + return false; + } + + disable_stuck_irq(desc, IRQ_NONE, "runaway", true); + return true; +} + #define SPURIOUS_DEFERRED 0x8000 void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret) @@
Re: [PATCH 0/3] warn and suppress irqflood
Hi Thomas, > On Wed, Oct 28, 2020 at 12:58:41PM +0100, Thomas Gleixner wrote: > ... > Something like the completly untested below should work independent of > config options. > > Thanks, > > tglx > --- > include/linux/irqdesc.h |4 ++ > kernel/irq/manage.c |3 + > kernel/irq/spurious.c | 74 > +++- > 3 files changed, 61 insertions(+), 20 deletions(-) > > --- a/include/linux/irqdesc.h > +++ b/include/linux/irqdesc.h > @@ -30,6 +30,8 @@ struct pt_regs; > * @tot_count: stats field for non-percpu irqs > * @irq_count: stats field to detect stalled irqs > * @last_unhandled: aging timer for unhandled count > + * @storm_count: Counter for irq storm detection > + * @storm_checked: Timestamp for irq storm detection > * @irqs_unhandled: stats field for spurious unhandled interrupts > * @threads_handled: stats field for deferred spurious detection of threaded > handlers > * @threads_handled_last: comparator field for deferred spurious detection > of theraded handlers > @@ -65,6 +67,8 @@ struct irq_desc { > unsigned inttot_count; > unsigned intirq_count; /* For detecting broken IRQs */ > unsigned long last_unhandled; /* Aging timer for unhandled > count */ > + unsigned long storm_count; > + unsigned long storm_checked; > unsigned intirqs_unhandled; > atomic_tthreads_handled; > int threads_handled_last; > --- a/kernel/irq/manage.c > +++ b/kernel/irq/manage.c > @@ -1581,6 +1581,9 @@ static int > if (!shared) { > init_waitqueue_head(>wait_for_threads); > > + /* Take a timestamp for interrupt storm detection */ > + desc->storm_checked = jiffies; > + > /* Setup the type (level, edge polarity) if configured: */ > if (new->flags & IRQF_TRIGGER_MASK) { > ret = __irq_set_trigger(desc, > --- a/kernel/irq/spurious.c > +++ b/kernel/irq/spurious.c > @@ -21,6 +21,7 @@ static void poll_spurious_irqs(struct ti > static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs); > static int irq_poll_cpu; > static atomic_t irq_poll_active; > +static unsigned long irqstorm_limit __ro_after_init; > > /* > * We wait here for a poller to finish. > @@ -189,18 +190,21 @@ static inline int bad_action_ret(irqretu > * (The other 100-of-100,000 interrupts may have been a correctly > * functioning device sharing an IRQ with the failing one) > */ > -static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret) > +static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret, > + bool storm) > { > unsigned int irq = irq_desc_get_irq(desc); > struct irqaction *action; > unsigned long flags; > > - if (bad_action_ret(action_ret)) { > - printk(KERN_ERR "irq event %d: bogus return value %x\n", > - irq, action_ret); > - } else { > - printk(KERN_ERR "irq %d: nobody cared (try booting with " > + if (!storm) { > + if (bad_action_ret(action_ret)) { > + pr_err("irq event %d: bogus return value %x\n", > +irq, action_ret); > + } else { > + pr_err("irq %d: nobody cared (try booting with " > "the \"irqpoll\" option)\n", irq); > + } > } > dump_stack(); > printk(KERN_ERR "handlers:\n"); > @@ -228,7 +232,7 @@ static void report_bad_irq(struct irq_de > > if (count > 0) { > count--; > - __report_bad_irq(desc, action_ret); > + __report_bad_irq(desc, action_ret, false); > } > } > > @@ -267,6 +271,33 @@ try_misrouted_irq(unsigned int irq, stru > return action && (action->flags & IRQF_IRQPOLL); > } > > +static void disable_stuck_irq(struct irq_desc *desc, irqreturn_t action_ret, > + const char *reason, bool storm) > +{ > + __report_bad_irq(desc, action_ret, storm); > + pr_emerg("Disabling %s IRQ #%d\n", reason, irq_desc_get_irq(desc)); > + desc->istate |= IRQS_SPURIOUS_DISABLED; > + desc->depth++; > + irq_disable(desc); > +} > + > +/* Interrupt storm detector for runaway interrupts (handled or not). */ > +static bool irqstorm_detected(struct irq_desc *desc) > +{ > + unsigned long now = jiffies; > + > + if (++desc->storm_count < irqstorm_limit) { > + if (time_after(now, desc->storm_checked + HZ)) { > + desc->storm_count = 0; > + desc->storm_checked = now; > + } > + return false; > + } > + > + disable_stuck_irq(desc, IRQ_NONE, "runaway", true); > + return true; > +} > + > #define SPURIOUS_DEFERRED
Re: [PATCH 0/3] warn and suppress irqflood
On Wed, Oct 28, 2020 at 7:58 PM Thomas Gleixner wrote: > [...] > --- > include/linux/irqdesc.h |4 ++ > kernel/irq/manage.c |3 + > kernel/irq/spurious.c | 74 > +++- > 3 files changed, 61 insertions(+), 20 deletions(-) > > --- a/include/linux/irqdesc.h > +++ b/include/linux/irqdesc.h > @@ -30,6 +30,8 @@ struct pt_regs; > * @tot_count: stats field for non-percpu irqs > * @irq_count: stats field to detect stalled irqs > * @last_unhandled:aging timer for unhandled count > + * @storm_count: Counter for irq storm detection > + * @storm_checked: Timestamp for irq storm detection > * @irqs_unhandled:stats field for spurious unhandled interrupts > * @threads_handled: stats field for deferred spurious detection of > threaded handlers > * @threads_handled_last: comparator field for deferred spurious detection > of theraded handlers > @@ -65,6 +67,8 @@ struct irq_desc { > unsigned inttot_count; > unsigned intirq_count; /* For detecting broken IRQs > */ > unsigned long last_unhandled; /* Aging timer for unhandled > count */ > + unsigned long storm_count; > + unsigned long storm_checked; > unsigned intirqs_unhandled; > atomic_tthreads_handled; > int threads_handled_last; > --- a/kernel/irq/manage.c > +++ b/kernel/irq/manage.c > @@ -1581,6 +1581,9 @@ static int > if (!shared) { > init_waitqueue_head(>wait_for_threads); > > + /* Take a timestamp for interrupt storm detection */ > + desc->storm_checked = jiffies; > + > /* Setup the type (level, edge polarity) if configured: */ > if (new->flags & IRQF_TRIGGER_MASK) { > ret = __irq_set_trigger(desc, > --- a/kernel/irq/spurious.c > +++ b/kernel/irq/spurious.c > @@ -21,6 +21,7 @@ static void poll_spurious_irqs(struct ti > static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs); > static int irq_poll_cpu; > static atomic_t irq_poll_active; > +static unsigned long irqstorm_limit __ro_after_init; > > /* > * We wait here for a poller to finish. > @@ -189,18 +190,21 @@ static inline int bad_action_ret(irqretu > * (The other 100-of-100,000 interrupts may have been a correctly > * functioning device sharing an IRQ with the failing one) > */ > -static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret) > +static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret, > +bool storm) > { > unsigned int irq = irq_desc_get_irq(desc); > struct irqaction *action; > unsigned long flags; > > - if (bad_action_ret(action_ret)) { > - printk(KERN_ERR "irq event %d: bogus return value %x\n", > - irq, action_ret); > - } else { > - printk(KERN_ERR "irq %d: nobody cared (try booting with " > + if (!storm) { > + if (bad_action_ret(action_ret)) { > + pr_err("irq event %d: bogus return value %x\n", > + irq, action_ret); > + } else { > + pr_err("irq %d: nobody cared (try booting with " > "the \"irqpoll\" option)\n", irq); > + } > } > dump_stack(); > printk(KERN_ERR "handlers:\n"); > @@ -228,7 +232,7 @@ static void report_bad_irq(struct irq_de > > if (count > 0) { > count--; > - __report_bad_irq(desc, action_ret); > + __report_bad_irq(desc, action_ret, false); > } > } > > @@ -267,6 +271,33 @@ try_misrouted_irq(unsigned int irq, stru > return action && (action->flags & IRQF_IRQPOLL); > } > > +static void disable_stuck_irq(struct irq_desc *desc, irqreturn_t action_ret, > + const char *reason, bool storm) > +{ > + __report_bad_irq(desc, action_ret, storm); > + pr_emerg("Disabling %s IRQ #%d\n", reason, irq_desc_get_irq(desc)); > + desc->istate |= IRQS_SPURIOUS_DISABLED; > + desc->depth++; > + irq_disable(desc); > +} > + > +/* Interrupt storm detector for runaway interrupts (handled or not). */ > +static bool irqstorm_detected(struct irq_desc *desc) > +{ > + unsigned long now = jiffies; > + > + if (++desc->storm_count < irqstorm_limit) { > + if (time_after(now, desc->storm_checked + HZ)) { > + desc->storm_count = 0; > + desc->storm_checked = now; > + } > + return false; > + } > + > + disable_stuck_irq(desc, IRQ_NONE, "runaway", true); > + return true; > +} > + > #define SPURIOUS_DEFERRED 0x8000 > > void note_interrupt(struct
Re: [PATCH 0/3] warn and suppress irqflood
On Wed, Oct 28, 2020 at 12:58:41PM +0100, Thomas Gleixner wrote: > On Wed, Oct 28 2020 at 14:02, Pingfan Liu wrote: > > On Tue, Oct 27, 2020 at 3:59 AM Thomas Gleixner wrote: > >> Also Liu's patch only works if: > >> > >> 1) CONFIG_IRQ_TIME_ACCOUNTING is enabled > > > > I wonder whether it can not be a default option or not by the following > > method: > > DEFINE_STATIC_KEY_FALSE(irqtime_account), and enable it according to > > a boot param. > > How so? > > config IRQ_TIME_ACCOUNTING > depends on HAVE_IRQ_TIME_ACCOUNTING && > !VIRT_CPU_ACCOUNTING_NATIVE > Look closely at the two config value: -1. HAVE_IRQ_TIME_ACCOUNTING, it is selected by most of the popular arches, and can be further relaxed. It implies sched_clock() is fast enough for sampling. With current code, the variable sched_clock_irqtime=0 can be used to turn off irqtime accounting on some arches with slow sched_clock(). And it can be even better by using DEFINE_STATIC_KEY_FALSE(sched_clock_irqtime) So the pre-requirement can be relaxed as "depends on !VIRT_CPU_ACCOUNTING_NATIVE" In case that I can not express clearly, could you have a look at the demo patch? That patch _assumes_ that irqtime accounting costs much and is not turned on by default. If turned on, it will cost an extra jmp than current implement. And I think it is critical to my [1/3] whether this assumption is reasonable. -2. For VIRT_CPU_ACCOUNTING_NATIVE, it can only be selected by powerpc and ia64 In fact, I have a seperate patch for powerpc with CONFIG_VIRT_CPU_ACCOUNTING_NATIVE to utilize my [1/3]. --- diff --git a/init/Kconfig b/init/Kconfig index c944691..16d168b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -490,7 +490,7 @@ endchoice config IRQ_TIME_ACCOUNTING bool "Fine granularity task level IRQ time accounting" - depends on HAVE_IRQ_TIME_ACCOUNTING && !VIRT_CPU_ACCOUNTING_NATIVE + depends on !VIRT_CPU_ACCOUNTING_NATIVE help Select this option to enable fine granularity task irq time accounting. This is done by reading a timestamp on each diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 5a55d23..3ab7e1d 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -19,7 +19,7 @@ */ DEFINE_PER_CPU(struct irqtime, cpu_irqtime); -static int sched_clock_irqtime; +DEFINE_STATIC_KEY_FALSE(sched_clock_irqtime); void enable_sched_clock_irqtime(void) { @@ -49,13 +49,14 @@ static void irqtime_account_delta(struct irqtime *irqtime, u64 delta, */ void irqtime_account_irq(struct task_struct *curr) { - struct irqtime *irqtime = this_cpu_ptr(_irqtime); + struct irqtime *irqtime; s64 delta; int cpu; - if (!sched_clock_irqtime) + if (static_branch_unlikely(_clock_irqtime)) return; + irqtime = this_cpu_ptr(_irqtime); cpu = smp_processor_id(); delta = sched_clock_cpu(cpu) - irqtime->irq_start_time; irqtime->irq_start_time += delta; @@ -84,16 +85,7 @@ static u64 irqtime_tick_accounted(u64 maxtime) return delta; } -#else /* CONFIG_IRQ_TIME_ACCOUNTING */ - -#define sched_clock_irqtime(0) - -static u64 irqtime_tick_accounted(u64 dummy) -{ - return 0; -} - -#endif /* !CONFIG_IRQ_TIME_ACCOUNTING */ +#endif static inline void task_group_account_field(struct task_struct *p, int index, u64 tmp) @@ -475,7 +467,7 @@ void account_process_tick(struct task_struct *p, int user_tick) if (vtime_accounting_enabled_this_cpu()) return; - if (sched_clock_irqtime) { + if (static_branch_unlikely(_clock_irqtime)) irqtime_account_process_tick(p, user_tick, 1); return; } @@ -504,7 +496,7 @@ void account_idle_ticks(unsigned long ticks) { u64 cputime, steal; - if (sched_clock_irqtime) { + if (static_branch_unlikely(_clock_irqtime)) irqtime_account_idle_ticks(ticks); return; } -- 2.7.5 [...] > + > +static int __init irqstorm_setup(char *arg) > +{ > + int res = kstrtoul(arg, 0, _limit); > + > + if (!res) { > + pr_info("Interrupt storm detector enabled. Limit=%lu / s\n", > + irqstorm_limit); > + } > + return !!res; > +} > +__setup("irqstorm_limit", irqstorm_setup); This configuration independent method looks appealing. And I am glad to have a try. But irqstorm_limit may be a hard choice. Maybe by formula: instruction-percpu-per-second / insn num of irq failed path ? It is hard to estimate "instruction-percpu-per-second". Thanks, Pingfan ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH 0/3] warn and suppress irqflood
On Wed, Oct 28 2020 at 14:02, Pingfan Liu wrote: > On Tue, Oct 27, 2020 at 3:59 AM Thomas Gleixner wrote: >> Also Liu's patch only works if: >> >> 1) CONFIG_IRQ_TIME_ACCOUNTING is enabled > > I wonder whether it can not be a default option or not by the following > method: > DEFINE_STATIC_KEY_FALSE(irqtime_account), and enable it according to > a boot param. How so? config IRQ_TIME_ACCOUNTING depends on HAVE_IRQ_TIME_ACCOUNTING && !VIRT_CPU_ACCOUNTING_NATIVE > This will have no impact on performance with the disabled branch. > Meanwhile users can easily turn on the option to detect an irq flood > without recompiling the kernel. > > If it is doable, I will rework only on [1/2]. See above :) >> 2) the runaway interrupt has been requested by the relevant driver in >> the dump kernel. > > Yes, it raises a big challenge to my method. Kdump kernel miss the > whole picture of the first kernel's irq routing. Correct. If there is anything stale then you get what Guilherme observed. But the irq core can do nothing about that. Something like the completly untested below should work independent of config options. Thanks, tglx --- include/linux/irqdesc.h |4 ++ kernel/irq/manage.c |3 + kernel/irq/spurious.c | 74 +++- 3 files changed, 61 insertions(+), 20 deletions(-) --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -30,6 +30,8 @@ struct pt_regs; * @tot_count: stats field for non-percpu irqs * @irq_count: stats field to detect stalled irqs * @last_unhandled:aging timer for unhandled count + * @storm_count: Counter for irq storm detection + * @storm_checked: Timestamp for irq storm detection * @irqs_unhandled:stats field for spurious unhandled interrupts * @threads_handled: stats field for deferred spurious detection of threaded handlers * @threads_handled_last: comparator field for deferred spurious detection of theraded handlers @@ -65,6 +67,8 @@ struct irq_desc { unsigned inttot_count; unsigned intirq_count; /* For detecting broken IRQs */ unsigned long last_unhandled; /* Aging timer for unhandled count */ + unsigned long storm_count; + unsigned long storm_checked; unsigned intirqs_unhandled; atomic_tthreads_handled; int threads_handled_last; --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1581,6 +1581,9 @@ static int if (!shared) { init_waitqueue_head(>wait_for_threads); + /* Take a timestamp for interrupt storm detection */ + desc->storm_checked = jiffies; + /* Setup the type (level, edge polarity) if configured: */ if (new->flags & IRQF_TRIGGER_MASK) { ret = __irq_set_trigger(desc, --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -21,6 +21,7 @@ static void poll_spurious_irqs(struct ti static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs); static int irq_poll_cpu; static atomic_t irq_poll_active; +static unsigned long irqstorm_limit __ro_after_init; /* * We wait here for a poller to finish. @@ -189,18 +190,21 @@ static inline int bad_action_ret(irqretu * (The other 100-of-100,000 interrupts may have been a correctly * functioning device sharing an IRQ with the failing one) */ -static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret) +static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret, +bool storm) { unsigned int irq = irq_desc_get_irq(desc); struct irqaction *action; unsigned long flags; - if (bad_action_ret(action_ret)) { - printk(KERN_ERR "irq event %d: bogus return value %x\n", - irq, action_ret); - } else { - printk(KERN_ERR "irq %d: nobody cared (try booting with " + if (!storm) { + if (bad_action_ret(action_ret)) { + pr_err("irq event %d: bogus return value %x\n", + irq, action_ret); + } else { + pr_err("irq %d: nobody cared (try booting with " "the \"irqpoll\" option)\n", irq); + } } dump_stack(); printk(KERN_ERR "handlers:\n"); @@ -228,7 +232,7 @@ static void report_bad_irq(struct irq_de if (count > 0) { count--; - __report_bad_irq(desc, action_ret); + __report_bad_irq(desc, action_ret, false); } } @@ -267,6 +271,33 @@ try_misrouted_irq(unsigned int irq, stru return action && (action->flags & IRQF_IRQPOLL); } +static void disable_stuck_irq(struct irq_desc *desc, irqreturn_t action_ret, +
Re: [PATCH 0/3] warn and suppress irqflood
On Tue, Oct 27, 2020 at 3:59 AM Thomas Gleixner wrote: > [...] > > And contrary to Liu's patches which try to disable a requested interrupt > if too many of them arrive, the kernel cannot do anything because there > is nothing to disable in your case. That's why you needed to do the MSI > disable magic in the early PCI quirks which run before interrupts get > enabled. > > Also Liu's patch only works if: > > 1) CONFIG_IRQ_TIME_ACCOUNTING is enabled I wonder whether it can not be a default option or not by the following method: DEFINE_STATIC_KEY_FALSE(irqtime_account), and enable it according to a boot param. This will have no impact on performance with the disabled branch. Meanwhile users can easily turn on the option to detect an irq flood without recompiling the kernel. If it is doable, I will rework only on [1/2]. > > 2) the runaway interrupt has been requested by the relevant driver in > the dump kernel. Yes, it raises a big challenge to my method. Kdump kernel miss the whole picture of the first kernel's irq routing. Thanks, Pingfan ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH 0/3] warn and suppress irqflood
On Mon, Oct 26 2020 at 17:28, Guilherme Piccoli wrote: > On Mon, Oct 26, 2020 at 4:59 PM Thomas Gleixner wrote: >> It gets flooded right at the point where the crash kernel enables >> interrupts in start_kernel(). At that point there is no device driver >> and no interupt requested. All you can see on the console for this is >> >> "common_interrupt: $VECTOR.$CPU No irq handler for vector" >> >> And contrary to Liu's patches which try to disable a requested interrupt >> if too many of them arrive, the kernel cannot do anything because there >> is nothing to disable in your case. That's why you needed to do the MSI >> disable magic in the early PCI quirks which run before interrupts get >> enabled. > > Wow, thank you very much for this great explanation (without a > reproducer) - it's nice to hear somebody that deeply understands the > code! And double thanks for CCing Bjorn. Understanding the code is only half of the picture. You need to understand how the hardware works or not :) > So, I don't want to hijack Liu's thread, but do you think it makes > sense to have my approach as a (debug) parameter to prevent such a > degenerate case? At least it makes sense to some extent even if it's incomplete. What bothers me is that it'd be x86 specific while the issue is pretty much architecture independent. I don't think that the APIC is special in that regard. Rogue MSIs should be able to bring down pretty much all architectures. > Or could we have something in core IRQ code to prevent irq flooding in > such scenarios, something "stronger" than disabling MSIs (APIC-level, > likely)? For your case? No. The APIC cannot be protected against rogue MSIs. The only cure is to disable interrupts or disable MSIs on all PCI[E] devices early on. Disabling interrupts is not so much of an option obviously :) Thanks, tglx ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH 0/3] warn and suppress irqflood
On Mon, Oct 26, 2020 at 4:59 PM Thomas Gleixner wrote: > > On Mon, Oct 26 2020 at 12:06, Guilherme Piccoli wrote: > > On Sun, Oct 25, 2020 at 8:12 AM Pingfan Liu wrote: > > > > Some time ago (2 years) we faced a similar issue in x86-64, a hard to > > debug problem in kdump, that eventually was narrowed to a buggy NIC FW > > flooding IRQs in kdump kernel, and no messages showed (although kernel > > changed a lot since that time, today we might have better IRQ > > handling/warning). We tried an early-boot fix, by disabling MSIs (as > > per PCI spec) early in x86 boot, but it wasn't accepted - Bjorn asked > > pertinent questions that I couldn't respond (I lost the reproducer) > > [0]. > ... > > [0] lore.kernel.org/linux-pci/20181018183721.27467-1-gpicc...@canonical.com > > With that broken firmware the NIC continued to send MSI messages to the > vector/CPU which was assigned to it before the crash. But the crash > kernel has no interrupt descriptor for this vector installed. So Liu's > patches wont print anything simply because the interrupt core cannot > detect it. > > To answer Bjorns still open question about when the point X is: > > > https://lore.kernel.org/linux-pci/20181023170343.ga4...@bhelgaas-glaptop.roam.corp.google.com/ > > It gets flooded right at the point where the crash kernel enables > interrupts in start_kernel(). At that point there is no device driver > and no interupt requested. All you can see on the console for this is > > "common_interrupt: $VECTOR.$CPU No irq handler for vector" > > And contrary to Liu's patches which try to disable a requested interrupt > if too many of them arrive, the kernel cannot do anything because there > is nothing to disable in your case. That's why you needed to do the MSI > disable magic in the early PCI quirks which run before interrupts get > enabled. > > Also Liu's patch only works if: > > 1) CONFIG_IRQ_TIME_ACCOUNTING is enabled > > 2) the runaway interrupt has been requested by the relevant driver in > the dump kernel. > > Especially #1 is not a sensible restriction. > > Thanks, > > tglx Wow, thank you very much for this great explanation (without a reproducer) - it's nice to hear somebody that deeply understands the code! And double thanks for CCing Bjorn. So, I don't want to hijack Liu's thread, but do you think it makes sense to have my approach as a (debug) parameter to prevent such a degenerate case? Or could we have something in core IRQ code to prevent irq flooding in such scenarios, something "stronger" than disabling MSIs (APIC-level, likely)? Cheers, Guilherme ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH 0/3] warn and suppress irqflood
On Mon, Oct 26 2020 at 12:06, Guilherme Piccoli wrote: > On Sun, Oct 25, 2020 at 8:12 AM Pingfan Liu wrote: > > Some time ago (2 years) we faced a similar issue in x86-64, a hard to > debug problem in kdump, that eventually was narrowed to a buggy NIC FW > flooding IRQs in kdump kernel, and no messages showed (although kernel > changed a lot since that time, today we might have better IRQ > handling/warning). We tried an early-boot fix, by disabling MSIs (as > per PCI spec) early in x86 boot, but it wasn't accepted - Bjorn asked > pertinent questions that I couldn't respond (I lost the reproducer) > [0]. ... > [0] lore.kernel.org/linux-pci/20181018183721.27467-1-gpicc...@canonical.com With that broken firmware the NIC continued to send MSI messages to the vector/CPU which was assigned to it before the crash. But the crash kernel has no interrupt descriptor for this vector installed. So Liu's patches wont print anything simply because the interrupt core cannot detect it. To answer Bjorns still open question about when the point X is: https://lore.kernel.org/linux-pci/20181023170343.ga4...@bhelgaas-glaptop.roam.corp.google.com/ It gets flooded right at the point where the crash kernel enables interrupts in start_kernel(). At that point there is no device driver and no interupt requested. All you can see on the console for this is "common_interrupt: $VECTOR.$CPU No irq handler for vector" And contrary to Liu's patches which try to disable a requested interrupt if too many of them arrive, the kernel cannot do anything because there is nothing to disable in your case. That's why you needed to do the MSI disable magic in the early PCI quirks which run before interrupts get enabled. Also Liu's patch only works if: 1) CONFIG_IRQ_TIME_ACCOUNTING is enabled 2) the runaway interrupt has been requested by the relevant driver in the dump kernel. Especially #1 is not a sensible restriction. Thanks, tglx ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH 0/3] warn and suppress irqflood
On Sun, Oct 25, 2020 at 8:12 AM Pingfan Liu wrote: > > On Thu, Oct 22, 2020 at 4:37 PM Thomas Gleixner wrote: > > > > On Thu, Oct 22 2020 at 13:56, Pingfan Liu wrote: > > > I hit a irqflood bug on powerpc platform, and two years ago, on a x86 > > > platform. > > > When the bug happens, the kernel is totally occupies by irq. Currently, > > > there > > > may be nothing or just soft lockup warning showed in console. It is better > > > to warn users with irq flood info. > > > > > > In the kdump case, the kernel can move on by suppressing the irq flood. > > > > You're curing the symptom not the cause and the cure is just magic and > > can't work reliably. > Yeah, it is magic. But at least, it is better to printk something and > alarm users about what happens. With current code, it may show nothing > when system hangs. Thanks Pingfan and Thomas for the points - I'd like to have a mechanism in the kernel to warn users when an IRQ flood is potentially happening. Some time ago (2 years) we faced a similar issue in x86-64, a hard to debug problem in kdump, that eventually was narrowed to a buggy NIC FW flooding IRQs in kdump kernel, and no messages showed (although kernel changed a lot since that time, today we might have better IRQ handling/warning). We tried an early-boot fix, by disabling MSIs (as per PCI spec) early in x86 boot, but it wasn't accepted - Bjorn asked pertinent questions that I couldn't respond (I lost the reproducer) [0]. Cheers, Guilherme [0] lore.kernel.org/linux-pci/20181018183721.27467-1-gpicc...@canonical.com ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [Skiboot] [PATCH 0/3] warn and suppress irqflood
On Mon, Oct 26, 2020 at 12:11 AM Pingfan Liu wrote: > > On Sun, Oct 25, 2020 at 8:21 PM Oliver O'Halloran wrote: > > > > On Sun, Oct 25, 2020 at 10:22 PM Pingfan Liu wrote: > > > > > > On Thu, Oct 22, 2020 at 4:37 PM Thomas Gleixner > > > wrote: > > > > > > > > On Thu, Oct 22 2020 at 13:56, Pingfan Liu wrote: > > > > > I hit a irqflood bug on powerpc platform, and two years ago, on a x86 > > > > > platform. > > > > > When the bug happens, the kernel is totally occupies by irq. > > > > > Currently, there > > > > > may be nothing or just soft lockup warning showed in console. It is > > > > > better > > > > > to warn users with irq flood info. > > > > > > > > > > In the kdump case, the kernel can move on by suppressing the irq > > > > > flood. > > > > > > > > You're curing the symptom not the cause and the cure is just magic and > > > > can't work reliably. > > > Yeah, it is magic. But at least, it is better to printk something and > > > alarm users about what happens. With current code, it may show nothing > > > when system hangs. > > > > > > > > Where is that irq flood originated from and why is none of the > > > > mechanisms we have in place to shut it up working? > > > The bug originates from a driver tpm_i2c_nuvoton, which calls i2c-bus > > > driver (i2c-opal.c). After i2c_opal_send_request(), the bug is > > > triggered. > > > > > > But things are complicated by introducing a firmware layer: Skiboot. > > > This software layer hides the detail of manipulating the hardware from > > > Linux. > > > > > > I guess the software logic can not enter a sane state when kernel crashes. > > > > > > Cc Skiboot and ppc64 community to see whether anyone has idea about it. > > > > What system are you using? > > Here is the info, if not enough, I will get more. > Product Name : OpenPOWER Firmware > Product Version : open-power-SUPERMICRO-P9DSU-V1.16-20180531-imp > Product Extra : op-build-e4b3eb5 > Product Extra : skiboot-v6.0-p1da203b > Product Extra : hostboot-f911e5c-pda8239f > Product Extra : occ-77bb5e6-p623d1cd > Product Extra : linux-4.16.7-openpower2-pbc45895 > Product Extra : petitboot-v1.7.1-pf773c0d > Product Extra : machine-xml-218a77a Unfortunately I don't have a schematic for that one. > > There's an external interrupt pin which is supposed to be wired to the > > TPM. I think we bounce that interrupt to FW by default since the > > external interrupt is sometimes used for other system-specific > > purposes. Odds are FW doesn't know what to do with it so you > > effectively have an always-on LSI. I fixed a similar bug a while ago > > by having skiboot mask any interrupts it doesn't have a handler for, > > This sounds like the root cause. But here Skiboot should have handler, > otherwise the first kernel can not run smoothly. I don't know why the TPM interrupt is asserted. If the TPM driver is polling for a response it might clear the underlying condition as a side effect of it's normal operation. > Do you have any idea about an unexpected re-initialization introducing > an unsane stage? No idea, but those TPMs have a history of bricking themselves if you do anything slightly odd to them. It wouldn't surprise me if the re-probe can cause issues. > Thanks, > Pingfan ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [Skiboot] [PATCH 0/3] warn and suppress irqflood
On Sun, Oct 25, 2020 at 8:21 PM Oliver O'Halloran wrote: > > On Sun, Oct 25, 2020 at 10:22 PM Pingfan Liu wrote: > > > > On Thu, Oct 22, 2020 at 4:37 PM Thomas Gleixner wrote: > > > > > > On Thu, Oct 22 2020 at 13:56, Pingfan Liu wrote: > > > > I hit a irqflood bug on powerpc platform, and two years ago, on a x86 > > > > platform. > > > > When the bug happens, the kernel is totally occupies by irq. > > > > Currently, there > > > > may be nothing or just soft lockup warning showed in console. It is > > > > better > > > > to warn users with irq flood info. > > > > > > > > In the kdump case, the kernel can move on by suppressing the irq flood. > > > > > > You're curing the symptom not the cause and the cure is just magic and > > > can't work reliably. > > Yeah, it is magic. But at least, it is better to printk something and > > alarm users about what happens. With current code, it may show nothing > > when system hangs. > > > > > > Where is that irq flood originated from and why is none of the > > > mechanisms we have in place to shut it up working? > > The bug originates from a driver tpm_i2c_nuvoton, which calls i2c-bus > > driver (i2c-opal.c). After i2c_opal_send_request(), the bug is > > triggered. > > > > But things are complicated by introducing a firmware layer: Skiboot. > > This software layer hides the detail of manipulating the hardware from > > Linux. > > > > I guess the software logic can not enter a sane state when kernel crashes. > > > > Cc Skiboot and ppc64 community to see whether anyone has idea about it. > > What system are you using? Here is the info, if not enough, I will get more. Product Name : OpenPOWER Firmware Product Version : open-power-SUPERMICRO-P9DSU-V1.16-20180531-imp Product Extra : op-build-e4b3eb5 Product Extra : skiboot-v6.0-p1da203b Product Extra : hostboot-f911e5c-pda8239f Product Extra : occ-77bb5e6-p623d1cd Product Extra : linux-4.16.7-openpower2-pbc45895 Product Extra : petitboot-v1.7.1-pf773c0d Product Extra : machine-xml-218a77a > > There's an external interrupt pin which is supposed to be wired to the > TPM. I think we bounce that interrupt to FW by default since the > external interrupt is sometimes used for other system-specific > purposes. Odds are FW doesn't know what to do with it so you > effectively have an always-on LSI. I fixed a similar bug a while ago > by having skiboot mask any interrupts it doesn't have a handler for, This sounds like the root cause. But here Skiboot should have handler, otherwise the first kernel can not run smoothly. Do you have any idea about an unexpected re-initialization introducing an unsane stage? Thanks, Pingfan ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [Skiboot] [PATCH 0/3] warn and suppress irqflood
On Sun, Oct 25, 2020 at 10:22 PM Pingfan Liu wrote: > > On Thu, Oct 22, 2020 at 4:37 PM Thomas Gleixner wrote: > > > > On Thu, Oct 22 2020 at 13:56, Pingfan Liu wrote: > > > I hit a irqflood bug on powerpc platform, and two years ago, on a x86 > > > platform. > > > When the bug happens, the kernel is totally occupies by irq. Currently, > > > there > > > may be nothing or just soft lockup warning showed in console. It is better > > > to warn users with irq flood info. > > > > > > In the kdump case, the kernel can move on by suppressing the irq flood. > > > > You're curing the symptom not the cause and the cure is just magic and > > can't work reliably. > Yeah, it is magic. But at least, it is better to printk something and > alarm users about what happens. With current code, it may show nothing > when system hangs. > > > > Where is that irq flood originated from and why is none of the > > mechanisms we have in place to shut it up working? > The bug originates from a driver tpm_i2c_nuvoton, which calls i2c-bus > driver (i2c-opal.c). After i2c_opal_send_request(), the bug is > triggered. > > But things are complicated by introducing a firmware layer: Skiboot. > This software layer hides the detail of manipulating the hardware from > Linux. > > I guess the software logic can not enter a sane state when kernel crashes. > > Cc Skiboot and ppc64 community to see whether anyone has idea about it. What system are you using? There's an external interrupt pin which is supposed to be wired to the TPM. I think we bounce that interrupt to FW by default since the external interrupt is sometimes used for other system-specific purposes. Odds are FW doesn't know what to do with it so you effectively have an always-on LSI. I fixed a similar bug a while ago by having skiboot mask any interrupts it doesn't have a handler for, but I have no idea when that fix will land in a released FW build. Oh well. ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH 0/3] warn and suppress irqflood
On Thu, Oct 22, 2020 at 4:37 PM Thomas Gleixner wrote: > > On Thu, Oct 22 2020 at 13:56, Pingfan Liu wrote: > > I hit a irqflood bug on powerpc platform, and two years ago, on a x86 > > platform. > > When the bug happens, the kernel is totally occupies by irq. Currently, > > there > > may be nothing or just soft lockup warning showed in console. It is better > > to warn users with irq flood info. > > > > In the kdump case, the kernel can move on by suppressing the irq flood. > > You're curing the symptom not the cause and the cure is just magic and > can't work reliably. Yeah, it is magic. But at least, it is better to printk something and alarm users about what happens. With current code, it may show nothing when system hangs. > > Where is that irq flood originated from and why is none of the > mechanisms we have in place to shut it up working? The bug originates from a driver tpm_i2c_nuvoton, which calls i2c-bus driver (i2c-opal.c). After i2c_opal_send_request(), the bug is triggered. But things are complicated by introducing a firmware layer: Skiboot. This software layer hides the detail of manipulating the hardware from Linux. I guess the software logic can not enter a sane state when kernel crashes. Cc Skiboot and ppc64 community to see whether anyone has idea about it. Thanks, Pingfan ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
Re: [PATCH 0/3] warn and suppress irqflood
On Thu, Oct 22 2020 at 13:56, Pingfan Liu wrote: > I hit a irqflood bug on powerpc platform, and two years ago, on a x86 > platform. > When the bug happens, the kernel is totally occupies by irq. Currently, there > may be nothing or just soft lockup warning showed in console. It is better > to warn users with irq flood info. > > In the kdump case, the kernel can move on by suppressing the irq flood. You're curing the symptom not the cause and the cure is just magic and can't work reliably. Where is that irq flood originated from and why is none of the mechanisms we have in place to shut it up working? Thanks, tglx ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec
[PATCH 0/3] warn and suppress irqflood
I hit a irqflood bug on powerpc platform, and two years ago, on a x86 platform. When the bug happens, the kernel is totally occupies by irq. Currently, there may be nothing or just soft lockup warning showed in console. It is better to warn users with irq flood info. In the kdump case, the kernel can move on by suppressing the irq flood. Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Jisheng Zhang Cc: Andrew Morton Cc: "Guilherme G. Piccoli" Cc: Petr Mladek Cc: Marc Zyngier Cc: Linus Walleij Cc: afzal mohammed Cc: Lina Iyer Cc: "Gustavo A. R. Silva" Cc: Maulik Shah Cc: Al Viro Cc: Jonathan Corbet Cc: Pawan Gupta Cc: Mike Kravetz Cc: Oliver Neukum To: linux-ker...@vger.kernel.org Cc: linux-...@vger.kernel.org Cc: kexec@lists.infradead.org Pingfan Liu (3): kernel/watchdog: show irq percentage if irq floods kernel/watchdog: suppress max irq when irq floods Documentation: introduce a param "irqflood_suppress" Documentation/admin-guide/kernel-parameters.txt | 3 ++ include/linux/irq.h | 2 ++ kernel/irq/spurious.c | 32 + kernel/watchdog.c | 48 + 4 files changed, 85 insertions(+) -- 2.7.5 ___ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec