The capture kernel should try its best to save the crash info. Normally, irq flood is caused by some trivial devices, which has no impact on saving vmcore.
Introducing a parameter "irqflood_suppress" to enable suppress irq flood. Signed-off-by: Pingfan Liu <kernelf...@gmail.com> Cc: Thomas Gleixner <t...@linutronix.de> Cc: Peter Zijlstra <pet...@infradead.org> Cc: Jisheng Zhang <jisheng.zh...@synaptics.com> Cc: Andrew Morton <a...@linux-foundation.org> Cc: "Guilherme G. Piccoli" <gpicc...@canonical.com> Cc: Petr Mladek <pmla...@suse.com> Cc: Marc Zyngier <m...@kernel.org> Cc: Linus Walleij <linus.wall...@linaro.org> Cc: afzal mohammed <afzal.mohd...@gmail.com> Cc: Lina Iyer <il...@codeaurora.org> Cc: "Gustavo A. R. Silva" <gust...@embeddedor.com> Cc: Maulik Shah <mks...@codeaurora.org> Cc: Al Viro <v...@zeniv.linux.org.uk> Cc: Jonathan Corbet <cor...@lwn.net> Cc: Pawan Gupta <pawan.kumar.gu...@linux.intel.com> Cc: Mike Kravetz <mike.krav...@oracle.com> Cc: Oliver Neukum <oneu...@suse.com> To: linux-kernel@vger.kernel.org Cc: linux-...@vger.kernel.org Cc: ke...@lists.infradead.org --- include/linux/irq.h | 2 ++ kernel/irq/spurious.c | 32 ++++++++++++++++++++++++++++++++ kernel/watchdog.c | 9 ++++++++- 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/include/linux/irq.h b/include/linux/irq.h index 1b7f4df..140cb61 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -684,6 +684,8 @@ extern void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret); /* Enable/disable irq debugging output: */ extern int noirqdebug_setup(char *str); +void suppress_max_irq(void); + /* Checks whether the interrupt can be requested by request_irq(): */ extern int can_request_irq(unsigned int irq, unsigned long irqflags); diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index f865e5f..d3d94d6 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -464,3 +464,35 @@ static int __init irqpoll_setup(char *str) } __setup("irqpoll", irqpoll_setup); + +#ifdef CONFIG_IRQ_TIME_ACCOUNTING + +static bool irqflood_suppress; + +static int __init irqflood_suppress_setup(char *str) +{ + irqflood_suppress = true; + pr_info("enable auto suppress irqflood\n"); + return 1; +} +__setup("irqflood_suppress", irqflood_suppress_setup); + +void suppress_max_irq(void) +{ + unsigned int tmp, maxirq = 0, max = 0; + int irq; + + if (!irqflood_suppress) + return; + for_each_active_irq(irq) { + tmp = kstat_irqs_cpu(irq, smp_processor_id()); + if (max < tmp) { + maxirq = irq; + max = tmp; + } + } + pr_warn("Suppress irq:%u, which is triggered %u times\n", + maxirq, max); + disable_irq_nosync(maxirq); +} +#endif diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 230ac38..28a74e5 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -24,6 +24,7 @@ #include <linux/sched/isolation.h> #include <linux/stop_machine.h> #include <linux/kernel_stat.h> +#include <linux/irq.h> #include <asm/irq_regs.h> #include <linux/kvm_para.h> @@ -364,9 +365,15 @@ static void check_irq_flood(void) percent = irqts * 100 / totalts; percent = percent < 100 ? percent : 100; __this_cpu_write(check_hint, -1); - if (percent >= 98) + if (percent >= 98) { pr_info("Irq flood occupies more than %lu%% of the past %lu seconds\n", percent, totalts >> 30); + /* + * Suppress top irq when scheduler does not work for long time and irq + * occupies too much time. + */ + suppress_max_irq(); + } } else if (cnt == 0) { __this_cpu_write(last_total_ts, totalts); __this_cpu_write(last_irq_ts, irqts); -- 2.7.5