Re: [PATCH 0/3] warn and suppress irqflood

2021-06-04 Thread Sai Prakash Ranjan

Hi Thomas,

On 2021-03-02 13:15, Sai Prakash Ranjan wrote:

Hi Thomas,


On Wed, Oct 28, 2020 at 12:58:41PM +0100, Thomas Gleixner wrote:



...


Something like the completly untested below should work independent of
config options.

Thanks,

tglx
---
 include/linux/irqdesc.h |4 ++
 kernel/irq/manage.c |3 +
 kernel/irq/spurious.c   |   74 
+++-

 3 files changed, 61 insertions(+), 20 deletions(-)

--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -30,6 +30,8 @@ struct pt_regs;
  * @tot_count: stats field for non-percpu irqs
  * @irq_count: stats field to detect stalled irqs
  * @last_unhandled:aging timer for unhandled count
+ * @storm_count:   Counter for irq storm detection
+ * @storm_checked: Timestamp for irq storm detection
  * @irqs_unhandled:stats field for spurious unhandled interrupts
  * @threads_handled:	stats field for deferred spurious detection of 
threaded handlers
  * @threads_handled_last: comparator field for deferred spurious 
detection of theraded handlers

@@ -65,6 +67,8 @@ struct irq_desc {
unsigned inttot_count;
unsigned intirq_count;  /* For detecting broken IRQs */
unsigned long   last_unhandled; /* Aging timer for unhandled 
count */
+   unsigned long   storm_count;
+   unsigned long   storm_checked;
unsigned intirqs_unhandled;
atomic_tthreads_handled;
int threads_handled_last;
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1581,6 +1581,9 @@ static int
if (!shared) {
init_waitqueue_head(>wait_for_threads);

+   /* Take a timestamp for interrupt storm detection */
+   desc->storm_checked = jiffies;
+
/* Setup the type (level, edge polarity) if configured: */
if (new->flags & IRQF_TRIGGER_MASK) {
ret = __irq_set_trigger(desc,
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -21,6 +21,7 @@ static void poll_spurious_irqs(struct ti
 static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs);
 static int irq_poll_cpu;
 static atomic_t irq_poll_active;
+static unsigned long irqstorm_limit __ro_after_init;

 /*
  * We wait here for a poller to finish.
@@ -189,18 +190,21 @@ static inline int bad_action_ret(irqretu
  * (The other 100-of-100,000 interrupts may have been a correctly
  *  functioning device sharing an IRQ with the failing one)
  */
-static void __report_bad_irq(struct irq_desc *desc, irqreturn_t 
action_ret)
+static void __report_bad_irq(struct irq_desc *desc, irqreturn_t 
action_ret,

+bool storm)
 {
unsigned int irq = irq_desc_get_irq(desc);
struct irqaction *action;
unsigned long flags;

-   if (bad_action_ret(action_ret)) {
-   printk(KERN_ERR "irq event %d: bogus return value %x\n",
-   irq, action_ret);
-   } else {
-   printk(KERN_ERR "irq %d: nobody cared (try booting with "
+   if (!storm) {
+   if (bad_action_ret(action_ret)) {
+   pr_err("irq event %d: bogus return value %x\n",
+  irq, action_ret);
+   } else {
+   pr_err("irq %d: nobody cared (try booting with "
"the \"irqpoll\" option)\n", irq);
+   }
}
dump_stack();
printk(KERN_ERR "handlers:\n");
@@ -228,7 +232,7 @@ static void report_bad_irq(struct irq_de

if (count > 0) {
count--;
-   __report_bad_irq(desc, action_ret);
+   __report_bad_irq(desc, action_ret, false);
}
 }

@@ -267,6 +271,33 @@ try_misrouted_irq(unsigned int irq, stru
return action && (action->flags & IRQF_IRQPOLL);
 }

+static void disable_stuck_irq(struct irq_desc *desc, irqreturn_t 
action_ret,

+ const char *reason, bool storm)
+{
+   __report_bad_irq(desc, action_ret, storm);
+   pr_emerg("Disabling %s IRQ #%d\n", reason, irq_desc_get_irq(desc));
+   desc->istate |= IRQS_SPURIOUS_DISABLED;
+   desc->depth++;
+   irq_disable(desc);
+}
+
+/* Interrupt storm detector for runaway interrupts (handled or not). 
*/

+static bool irqstorm_detected(struct irq_desc *desc)
+{
+   unsigned long now = jiffies;
+
+   if (++desc->storm_count < irqstorm_limit) {
+   if (time_after(now, desc->storm_checked + HZ)) {
+   desc->storm_count = 0;
+   desc->storm_checked = now;
+   }
+   return false;
+   }
+
+   disable_stuck_irq(desc, IRQ_NONE, "runaway", true);
+   return true;
+}
+
 #define SPURIOUS_DEFERRED  0x8000

 void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret)
@@ 

Re: [PATCH 0/3] warn and suppress irqflood

2021-03-01 Thread Sai Prakash Ranjan
Hi Thomas,

> On Wed, Oct 28, 2020 at 12:58:41PM +0100, Thomas Gleixner wrote:
>

...

> Something like the completly untested below should work independent of
> config options.
> 
> Thanks,
> 
> tglx
> ---
>  include/linux/irqdesc.h |4 ++
>  kernel/irq/manage.c |3 +
>  kernel/irq/spurious.c   |   74 
> +++-
>  3 files changed, 61 insertions(+), 20 deletions(-)
> 
> --- a/include/linux/irqdesc.h
> +++ b/include/linux/irqdesc.h
> @@ -30,6 +30,8 @@ struct pt_regs;
>   * @tot_count:   stats field for non-percpu irqs
>   * @irq_count:   stats field to detect stalled irqs
>   * @last_unhandled:  aging timer for unhandled count
> + * @storm_count: Counter for irq storm detection
> + * @storm_checked:   Timestamp for irq storm detection
>   * @irqs_unhandled:  stats field for spurious unhandled interrupts
>   * @threads_handled: stats field for deferred spurious detection of threaded 
> handlers
>   * @threads_handled_last: comparator field for deferred spurious detection 
> of theraded handlers
> @@ -65,6 +67,8 @@ struct irq_desc {
>   unsigned inttot_count;
>   unsigned intirq_count;  /* For detecting broken IRQs */
>   unsigned long   last_unhandled; /* Aging timer for unhandled 
> count */
> + unsigned long   storm_count;
> + unsigned long   storm_checked;
>   unsigned intirqs_unhandled;
>   atomic_tthreads_handled;
>   int threads_handled_last;
> --- a/kernel/irq/manage.c
> +++ b/kernel/irq/manage.c
> @@ -1581,6 +1581,9 @@ static int
>   if (!shared) {
>   init_waitqueue_head(>wait_for_threads);
>  
> + /* Take a timestamp for interrupt storm detection */
> + desc->storm_checked = jiffies;
> +
>   /* Setup the type (level, edge polarity) if configured: */
>   if (new->flags & IRQF_TRIGGER_MASK) {
>   ret = __irq_set_trigger(desc,
> --- a/kernel/irq/spurious.c
> +++ b/kernel/irq/spurious.c
> @@ -21,6 +21,7 @@ static void poll_spurious_irqs(struct ti
>  static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs);
>  static int irq_poll_cpu;
>  static atomic_t irq_poll_active;
> +static unsigned long irqstorm_limit __ro_after_init;
>  
>  /*
>   * We wait here for a poller to finish.
> @@ -189,18 +190,21 @@ static inline int bad_action_ret(irqretu
>   * (The other 100-of-100,000 interrupts may have been a correctly
>   *  functioning device sharing an IRQ with the failing one)
>   */
> -static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
> +static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret,
> +  bool storm)
>  {
>   unsigned int irq = irq_desc_get_irq(desc);
>   struct irqaction *action;
>   unsigned long flags;
>  
> - if (bad_action_ret(action_ret)) {
> - printk(KERN_ERR "irq event %d: bogus return value %x\n",
> - irq, action_ret);
> - } else {
> - printk(KERN_ERR "irq %d: nobody cared (try booting with "
> + if (!storm) {
> + if (bad_action_ret(action_ret)) {
> + pr_err("irq event %d: bogus return value %x\n",
> +irq, action_ret);
> + } else {
> + pr_err("irq %d: nobody cared (try booting with "
>   "the \"irqpoll\" option)\n", irq);
> + }
>   }
>   dump_stack();
>   printk(KERN_ERR "handlers:\n");
> @@ -228,7 +232,7 @@ static void report_bad_irq(struct irq_de
>  
>   if (count > 0) {
>   count--;
> - __report_bad_irq(desc, action_ret);
> + __report_bad_irq(desc, action_ret, false);
>   }
>  }
>  
> @@ -267,6 +271,33 @@ try_misrouted_irq(unsigned int irq, stru
>   return action && (action->flags & IRQF_IRQPOLL);
>  }
>  
> +static void disable_stuck_irq(struct irq_desc *desc, irqreturn_t action_ret,
> +   const char *reason, bool storm)
> +{
> + __report_bad_irq(desc, action_ret, storm);
> + pr_emerg("Disabling %s IRQ #%d\n", reason, irq_desc_get_irq(desc));
> + desc->istate |= IRQS_SPURIOUS_DISABLED;
> + desc->depth++;
> + irq_disable(desc);
> +}
> +
> +/* Interrupt storm detector for runaway interrupts (handled or not). */
> +static bool irqstorm_detected(struct irq_desc *desc)
> +{
> + unsigned long now = jiffies;
> +
> + if (++desc->storm_count < irqstorm_limit) {
> + if (time_after(now, desc->storm_checked + HZ)) {
> + desc->storm_count = 0;
> + desc->storm_checked = now;
> + }
> + return false;
> + }
> +
> + disable_stuck_irq(desc, IRQ_NONE, "runaway", true);
> + return true;
> +}
> +
>  #define SPURIOUS_DEFERRED

Re: [PATCH 0/3] warn and suppress irqflood

2020-11-05 Thread Pingfan Liu
On Wed, Oct 28, 2020 at 7:58 PM Thomas Gleixner  wrote:
>
[...]
> ---
>  include/linux/irqdesc.h |4 ++
>  kernel/irq/manage.c |3 +
>  kernel/irq/spurious.c   |   74 
> +++-
>  3 files changed, 61 insertions(+), 20 deletions(-)
>
> --- a/include/linux/irqdesc.h
> +++ b/include/linux/irqdesc.h
> @@ -30,6 +30,8 @@ struct pt_regs;
>   * @tot_count: stats field for non-percpu irqs
>   * @irq_count: stats field to detect stalled irqs
>   * @last_unhandled:aging timer for unhandled count
> + * @storm_count:   Counter for irq storm detection
> + * @storm_checked: Timestamp for irq storm detection
>   * @irqs_unhandled:stats field for spurious unhandled interrupts
>   * @threads_handled:   stats field for deferred spurious detection of 
> threaded handlers
>   * @threads_handled_last: comparator field for deferred spurious detection 
> of theraded handlers
> @@ -65,6 +67,8 @@ struct irq_desc {
> unsigned inttot_count;
> unsigned intirq_count;  /* For detecting broken IRQs 
> */
> unsigned long   last_unhandled; /* Aging timer for unhandled 
> count */
> +   unsigned long   storm_count;
> +   unsigned long   storm_checked;
> unsigned intirqs_unhandled;
> atomic_tthreads_handled;
> int threads_handled_last;
> --- a/kernel/irq/manage.c
> +++ b/kernel/irq/manage.c
> @@ -1581,6 +1581,9 @@ static int
> if (!shared) {
> init_waitqueue_head(>wait_for_threads);
>
> +   /* Take a timestamp for interrupt storm detection */
> +   desc->storm_checked = jiffies;
> +
> /* Setup the type (level, edge polarity) if configured: */
> if (new->flags & IRQF_TRIGGER_MASK) {
> ret = __irq_set_trigger(desc,
> --- a/kernel/irq/spurious.c
> +++ b/kernel/irq/spurious.c
> @@ -21,6 +21,7 @@ static void poll_spurious_irqs(struct ti
>  static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs);
>  static int irq_poll_cpu;
>  static atomic_t irq_poll_active;
> +static unsigned long irqstorm_limit __ro_after_init;
>
>  /*
>   * We wait here for a poller to finish.
> @@ -189,18 +190,21 @@ static inline int bad_action_ret(irqretu
>   * (The other 100-of-100,000 interrupts may have been a correctly
>   *  functioning device sharing an IRQ with the failing one)
>   */
> -static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
> +static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret,
> +bool storm)
>  {
> unsigned int irq = irq_desc_get_irq(desc);
> struct irqaction *action;
> unsigned long flags;
>
> -   if (bad_action_ret(action_ret)) {
> -   printk(KERN_ERR "irq event %d: bogus return value %x\n",
> -   irq, action_ret);
> -   } else {
> -   printk(KERN_ERR "irq %d: nobody cared (try booting with "
> +   if (!storm) {
> +   if (bad_action_ret(action_ret)) {
> +   pr_err("irq event %d: bogus return value %x\n",
> +  irq, action_ret);
> +   } else {
> +   pr_err("irq %d: nobody cared (try booting with "
> "the \"irqpoll\" option)\n", irq);
> +   }
> }
> dump_stack();
> printk(KERN_ERR "handlers:\n");
> @@ -228,7 +232,7 @@ static void report_bad_irq(struct irq_de
>
> if (count > 0) {
> count--;
> -   __report_bad_irq(desc, action_ret);
> +   __report_bad_irq(desc, action_ret, false);
> }
>  }
>
> @@ -267,6 +271,33 @@ try_misrouted_irq(unsigned int irq, stru
> return action && (action->flags & IRQF_IRQPOLL);
>  }
>
> +static void disable_stuck_irq(struct irq_desc *desc, irqreturn_t action_ret,
> + const char *reason, bool storm)
> +{
> +   __report_bad_irq(desc, action_ret, storm);
> +   pr_emerg("Disabling %s IRQ #%d\n", reason, irq_desc_get_irq(desc));
> +   desc->istate |= IRQS_SPURIOUS_DISABLED;
> +   desc->depth++;
> +   irq_disable(desc);
> +}
> +
> +/* Interrupt storm detector for runaway interrupts (handled or not). */
> +static bool irqstorm_detected(struct irq_desc *desc)
> +{
> +   unsigned long now = jiffies;
> +
> +   if (++desc->storm_count < irqstorm_limit) {
> +   if (time_after(now, desc->storm_checked + HZ)) {
> +   desc->storm_count = 0;
> +   desc->storm_checked = now;
> +   }
> +   return false;
> +   }
> +
> +   disable_stuck_irq(desc, IRQ_NONE, "runaway", true);
> +   return true;
> +}
> +
>  #define SPURIOUS_DEFERRED  0x8000
>
>  void note_interrupt(struct 

Re: [PATCH 0/3] warn and suppress irqflood

2020-10-29 Thread Pingfan Liu
On Wed, Oct 28, 2020 at 12:58:41PM +0100, Thomas Gleixner wrote:
> On Wed, Oct 28 2020 at 14:02, Pingfan Liu wrote:
> > On Tue, Oct 27, 2020 at 3:59 AM Thomas Gleixner  wrote:
> >> Also Liu's patch only works if:
> >>
> >>   1) CONFIG_IRQ_TIME_ACCOUNTING is enabled
> >
> > I wonder whether it can not be a default option or not by the following 
> > method:
> >   DEFINE_STATIC_KEY_FALSE(irqtime_account), and enable it according to
> > a boot param.
> 
> How so?
> 
>   config IRQ_TIME_ACCOUNTING
>   depends on HAVE_IRQ_TIME_ACCOUNTING && 
> !VIRT_CPU_ACCOUNTING_NATIVE
> 
Look closely at the two config value:
-1. HAVE_IRQ_TIME_ACCOUNTING, it is selected by most of the popular arches, and
can be further relaxed.
   It implies sched_clock() is fast enough for sampling. With current code, the
variable sched_clock_irqtime=0 can be used to turn off irqtime accounting on
some arches with slow sched_clock(). And it can be even better by using
DEFINE_STATIC_KEY_FALSE(sched_clock_irqtime)
   So the pre-requirement can be relaxed as "depends on 
!VIRT_CPU_ACCOUNTING_NATIVE"
In case that I can not express clearly, could you have a look at the demo patch?

   That patch _assumes_ that irqtime accounting costs much and is not turned on 
by
default. If turned on, it will cost an extra jmp than current implement.
And I think it is critical to my [1/3] whether this assumption is reasonable.

-2. For VIRT_CPU_ACCOUNTING_NATIVE, it can only be selected by powerpc and ia64

In fact, I have a seperate patch for powerpc with
CONFIG_VIRT_CPU_ACCOUNTING_NATIVE to utilize my [1/3].

---
diff --git a/init/Kconfig b/init/Kconfig
index c944691..16d168b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -490,7 +490,7 @@ endchoice
 
 config IRQ_TIME_ACCOUNTING
bool "Fine granularity task level IRQ time accounting"
-   depends on HAVE_IRQ_TIME_ACCOUNTING && !VIRT_CPU_ACCOUNTING_NATIVE
+   depends on !VIRT_CPU_ACCOUNTING_NATIVE
help
  Select this option to enable fine granularity task irq time
  accounting. This is done by reading a timestamp on each
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 5a55d23..3ab7e1d 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -19,7 +19,7 @@
  */
 DEFINE_PER_CPU(struct irqtime, cpu_irqtime);
 
-static int sched_clock_irqtime;
+DEFINE_STATIC_KEY_FALSE(sched_clock_irqtime);
 
 void enable_sched_clock_irqtime(void)
 {
@@ -49,13 +49,14 @@ static void irqtime_account_delta(struct irqtime *irqtime, 
u64 delta,
  */
 void irqtime_account_irq(struct task_struct *curr)
 {
-   struct irqtime *irqtime = this_cpu_ptr(_irqtime);
+   struct irqtime *irqtime;
s64 delta;
int cpu;
 
-   if (!sched_clock_irqtime)
+   if (static_branch_unlikely(_clock_irqtime))
return;
 
+   irqtime = this_cpu_ptr(_irqtime);
cpu = smp_processor_id();
delta = sched_clock_cpu(cpu) - irqtime->irq_start_time;
irqtime->irq_start_time += delta;
@@ -84,16 +85,7 @@ static u64 irqtime_tick_accounted(u64 maxtime)
return delta;
 }
 
-#else /* CONFIG_IRQ_TIME_ACCOUNTING */
-
-#define sched_clock_irqtime(0)
-
-static u64 irqtime_tick_accounted(u64 dummy)
-{
-   return 0;
-}
-
-#endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
+#endif
 
 static inline void task_group_account_field(struct task_struct *p, int index,
u64 tmp)
@@ -475,7 +467,7 @@ void account_process_tick(struct task_struct *p, int 
user_tick)
if (vtime_accounting_enabled_this_cpu())
return;
 
-   if (sched_clock_irqtime) {
+   if (static_branch_unlikely(_clock_irqtime))
irqtime_account_process_tick(p, user_tick, 1);
return;
}
@@ -504,7 +496,7 @@ void account_idle_ticks(unsigned long ticks)
 {
u64 cputime, steal;
 
-   if (sched_clock_irqtime) {
+   if (static_branch_unlikely(_clock_irqtime))
irqtime_account_idle_ticks(ticks);
return;
}
-- 
2.7.5

[...]
> +
> +static int __init irqstorm_setup(char *arg)
> +{
> + int res = kstrtoul(arg, 0, _limit);
> +
> + if (!res) {
> + pr_info("Interrupt storm detector enabled. Limit=%lu / s\n",
> + irqstorm_limit);
> + }
> + return !!res;
> +}
> +__setup("irqstorm_limit", irqstorm_setup);

This configuration independent method looks appealing. And I am glad to have a 
try.

But irqstorm_limit may be a hard choice. Maybe by formula:
instruction-percpu-per-second / insn num of irq failed path ?  It is hard to
estimate "instruction-percpu-per-second".

Thanks,
Pingfan

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 0/3] warn and suppress irqflood

2020-10-28 Thread Thomas Gleixner
On Wed, Oct 28 2020 at 14:02, Pingfan Liu wrote:
> On Tue, Oct 27, 2020 at 3:59 AM Thomas Gleixner  wrote:
>> Also Liu's patch only works if:
>>
>>   1) CONFIG_IRQ_TIME_ACCOUNTING is enabled
>
> I wonder whether it can not be a default option or not by the following 
> method:
>   DEFINE_STATIC_KEY_FALSE(irqtime_account), and enable it according to
> a boot param.

How so?

config IRQ_TIME_ACCOUNTING
depends on HAVE_IRQ_TIME_ACCOUNTING && 
!VIRT_CPU_ACCOUNTING_NATIVE

> This will have no impact on performance with the disabled branch.
> Meanwhile users can easily turn on the option to detect an irq flood
> without  recompiling the kernel.
>
> If it is doable, I will rework only on [1/2].

See above :)

>>   2) the runaway interrupt has been requested by the relevant driver in
>>  the dump kernel.
>
> Yes, it raises a big challenge to my method. Kdump kernel miss the
> whole picture of the first kernel's irq routing.

Correct. If there is anything stale then you get what Guilherme
observed. But the irq core can do nothing about that.

Something like the completly untested below should work independent of
config options.

Thanks,

tglx
---
 include/linux/irqdesc.h |4 ++
 kernel/irq/manage.c |3 +
 kernel/irq/spurious.c   |   74 +++-
 3 files changed, 61 insertions(+), 20 deletions(-)

--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -30,6 +30,8 @@ struct pt_regs;
  * @tot_count: stats field for non-percpu irqs
  * @irq_count: stats field to detect stalled irqs
  * @last_unhandled:aging timer for unhandled count
+ * @storm_count:   Counter for irq storm detection
+ * @storm_checked: Timestamp for irq storm detection
  * @irqs_unhandled:stats field for spurious unhandled interrupts
  * @threads_handled:   stats field for deferred spurious detection of threaded 
handlers
  * @threads_handled_last: comparator field for deferred spurious detection of 
theraded handlers
@@ -65,6 +67,8 @@ struct irq_desc {
unsigned inttot_count;
unsigned intirq_count;  /* For detecting broken IRQs */
unsigned long   last_unhandled; /* Aging timer for unhandled 
count */
+   unsigned long   storm_count;
+   unsigned long   storm_checked;
unsigned intirqs_unhandled;
atomic_tthreads_handled;
int threads_handled_last;
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1581,6 +1581,9 @@ static int
if (!shared) {
init_waitqueue_head(>wait_for_threads);
 
+   /* Take a timestamp for interrupt storm detection */
+   desc->storm_checked = jiffies;
+
/* Setup the type (level, edge polarity) if configured: */
if (new->flags & IRQF_TRIGGER_MASK) {
ret = __irq_set_trigger(desc,
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -21,6 +21,7 @@ static void poll_spurious_irqs(struct ti
 static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs);
 static int irq_poll_cpu;
 static atomic_t irq_poll_active;
+static unsigned long irqstorm_limit __ro_after_init;
 
 /*
  * We wait here for a poller to finish.
@@ -189,18 +190,21 @@ static inline int bad_action_ret(irqretu
  * (The other 100-of-100,000 interrupts may have been a correctly
  *  functioning device sharing an IRQ with the failing one)
  */
-static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
+static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret,
+bool storm)
 {
unsigned int irq = irq_desc_get_irq(desc);
struct irqaction *action;
unsigned long flags;
 
-   if (bad_action_ret(action_ret)) {
-   printk(KERN_ERR "irq event %d: bogus return value %x\n",
-   irq, action_ret);
-   } else {
-   printk(KERN_ERR "irq %d: nobody cared (try booting with "
+   if (!storm) {
+   if (bad_action_ret(action_ret)) {
+   pr_err("irq event %d: bogus return value %x\n",
+  irq, action_ret);
+   } else {
+   pr_err("irq %d: nobody cared (try booting with "
"the \"irqpoll\" option)\n", irq);
+   }
}
dump_stack();
printk(KERN_ERR "handlers:\n");
@@ -228,7 +232,7 @@ static void report_bad_irq(struct irq_de
 
if (count > 0) {
count--;
-   __report_bad_irq(desc, action_ret);
+   __report_bad_irq(desc, action_ret, false);
}
 }
 
@@ -267,6 +271,33 @@ try_misrouted_irq(unsigned int irq, stru
return action && (action->flags & IRQF_IRQPOLL);
 }
 
+static void disable_stuck_irq(struct irq_desc *desc, irqreturn_t action_ret,
+

Re: [PATCH 0/3] warn and suppress irqflood

2020-10-28 Thread Pingfan Liu
On Tue, Oct 27, 2020 at 3:59 AM Thomas Gleixner  wrote:
>
[...]
>
> And contrary to Liu's patches which try to disable a requested interrupt
> if too many of them arrive, the kernel cannot do anything because there
> is nothing to disable in your case. That's why you needed to do the MSI
> disable magic in the early PCI quirks which run before interrupts get
> enabled.
>
> Also Liu's patch only works if:
>
>   1) CONFIG_IRQ_TIME_ACCOUNTING is enabled

I wonder whether it can not be a default option or not by the following method:
  DEFINE_STATIC_KEY_FALSE(irqtime_account), and enable it according to
a boot param.

This will have no impact on performance with the disabled branch.
Meanwhile users can easily turn on the option to detect an irq flood
without  recompiling the kernel.

If it is doable, I will rework only on [1/2].
>
>   2) the runaway interrupt has been requested by the relevant driver in
>  the dump kernel.

Yes, it raises a big challenge to my method. Kdump kernel miss the
whole picture of the first kernel's irq routing.

Thanks,
Pingfan

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 0/3] warn and suppress irqflood

2020-10-26 Thread Thomas Gleixner
On Mon, Oct 26 2020 at 17:28, Guilherme Piccoli wrote:
> On Mon, Oct 26, 2020 at 4:59 PM Thomas Gleixner  wrote:
>> It gets flooded right at the point where the crash kernel enables
>> interrupts in start_kernel(). At that point there is no device driver
>> and no interupt requested. All you can see on the console for this is
>>
>>  "common_interrupt: $VECTOR.$CPU No irq handler for vector"
>>
>> And contrary to Liu's patches which try to disable a requested interrupt
>> if too many of them arrive, the kernel cannot do anything because there
>> is nothing to disable in your case. That's why you needed to do the MSI
>> disable magic in the early PCI quirks which run before interrupts get
>> enabled.
>
> Wow, thank you very much for this great explanation (without a
> reproducer) - it's nice to hear somebody that deeply understands the
> code! And double thanks for CCing Bjorn.

Understanding the code is only half of the picture. You need to
understand how the hardware works or not :)

> So, I don't want to hijack Liu's thread, but do you think it makes
> sense to have my approach as a (debug) parameter to prevent such a
> degenerate case?

At least it makes sense to some extent even if it's incomplete. What
bothers me is that it'd be x86 specific while the issue is pretty much
architecture independent. I don't think that the APIC is special in that
regard. Rogue MSIs should be able to bring down pretty much all
architectures.

> Or could we have something in core IRQ code to prevent irq flooding in
> such scenarios, something "stronger" than disabling MSIs (APIC-level,
> likely)?

For your case? No. The APIC cannot be protected against rogue MSIs. The
only cure is to disable interrupts or disable MSIs on all PCI[E] devices
early on. Disabling interrupts is not so much of an option obviously :)

Thanks,

tglx





___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 0/3] warn and suppress irqflood

2020-10-26 Thread Guilherme Piccoli
On Mon, Oct 26, 2020 at 4:59 PM Thomas Gleixner  wrote:
>
> On Mon, Oct 26 2020 at 12:06, Guilherme Piccoli wrote:
> > On Sun, Oct 25, 2020 at 8:12 AM Pingfan Liu  wrote:
> >
> > Some time ago (2 years) we faced a similar issue in x86-64, a hard to
> > debug problem in kdump, that eventually was narrowed to a buggy NIC FW
> > flooding IRQs in kdump kernel, and no messages showed (although kernel
> > changed a lot since that time, today we might have better IRQ
> > handling/warning). We tried an early-boot fix, by disabling MSIs (as
> > per PCI spec) early in x86 boot, but it wasn't accepted - Bjorn asked
> > pertinent questions that I couldn't respond (I lost the reproducer)
> > [0].
> ...
> > [0] lore.kernel.org/linux-pci/20181018183721.27467-1-gpicc...@canonical.com
>
> With that broken firmware the NIC continued to send MSI messages to the
> vector/CPU which was assigned to it before the crash. But the crash
> kernel has no interrupt descriptor for this vector installed. So Liu's
> patches wont print anything simply because the interrupt core cannot
> detect it.
>
> To answer Bjorns still open question about when the point X is:
>
>   
> https://lore.kernel.org/linux-pci/20181023170343.ga4...@bhelgaas-glaptop.roam.corp.google.com/
>
> It gets flooded right at the point where the crash kernel enables
> interrupts in start_kernel(). At that point there is no device driver
> and no interupt requested. All you can see on the console for this is
>
>  "common_interrupt: $VECTOR.$CPU No irq handler for vector"
>
> And contrary to Liu's patches which try to disable a requested interrupt
> if too many of them arrive, the kernel cannot do anything because there
> is nothing to disable in your case. That's why you needed to do the MSI
> disable magic in the early PCI quirks which run before interrupts get
> enabled.
>
> Also Liu's patch only works if:
>
>   1) CONFIG_IRQ_TIME_ACCOUNTING is enabled
>
>   2) the runaway interrupt has been requested by the relevant driver in
>  the dump kernel.
>
> Especially #1 is not a sensible restriction.
>
> Thanks,
>
> tglx

Wow, thank you very much for this great explanation (without a
reproducer) - it's nice to hear somebody that deeply understands the
code! And double thanks for CCing Bjorn.

So, I don't want to hijack Liu's thread, but do you think it makes
sense to have my approach as a (debug) parameter to prevent such a
degenerate case? Or could we have something in core IRQ code to
prevent irq flooding in such scenarios, something "stronger" than
disabling MSIs (APIC-level, likely)?

Cheers,


Guilherme

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 0/3] warn and suppress irqflood

2020-10-26 Thread Thomas Gleixner
On Mon, Oct 26 2020 at 12:06, Guilherme Piccoli wrote:
> On Sun, Oct 25, 2020 at 8:12 AM Pingfan Liu  wrote:
>
> Some time ago (2 years) we faced a similar issue in x86-64, a hard to
> debug problem in kdump, that eventually was narrowed to a buggy NIC FW
> flooding IRQs in kdump kernel, and no messages showed (although kernel
> changed a lot since that time, today we might have better IRQ
> handling/warning). We tried an early-boot fix, by disabling MSIs (as
> per PCI spec) early in x86 boot, but it wasn't accepted - Bjorn asked
> pertinent questions that I couldn't respond (I lost the reproducer)
> [0].
...
> [0] lore.kernel.org/linux-pci/20181018183721.27467-1-gpicc...@canonical.com

With that broken firmware the NIC continued to send MSI messages to the
vector/CPU which was assigned to it before the crash. But the crash
kernel has no interrupt descriptor for this vector installed. So Liu's
patches wont print anything simply because the interrupt core cannot
detect it.

To answer Bjorns still open question about when the point X is:

  
https://lore.kernel.org/linux-pci/20181023170343.ga4...@bhelgaas-glaptop.roam.corp.google.com/

It gets flooded right at the point where the crash kernel enables
interrupts in start_kernel(). At that point there is no device driver
and no interupt requested. All you can see on the console for this is

 "common_interrupt: $VECTOR.$CPU No irq handler for vector"

And contrary to Liu's patches which try to disable a requested interrupt
if too many of them arrive, the kernel cannot do anything because there
is nothing to disable in your case. That's why you needed to do the MSI
disable magic in the early PCI quirks which run before interrupts get
enabled.

Also Liu's patch only works if:

  1) CONFIG_IRQ_TIME_ACCOUNTING is enabled

  2) the runaway interrupt has been requested by the relevant driver in
 the dump kernel.

Especially #1 is not a sensible restriction.

Thanks,

tglx



___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 0/3] warn and suppress irqflood

2020-10-26 Thread Guilherme Piccoli
On Sun, Oct 25, 2020 at 8:12 AM Pingfan Liu  wrote:
>
> On Thu, Oct 22, 2020 at 4:37 PM Thomas Gleixner  wrote:
> >
> > On Thu, Oct 22 2020 at 13:56, Pingfan Liu wrote:
> > > I hit a irqflood bug on powerpc platform, and two years ago, on a x86 
> > > platform.
> > > When the bug happens, the kernel is totally occupies by irq.  Currently, 
> > > there
> > > may be nothing or just soft lockup warning showed in console. It is better
> > > to warn users with irq flood info.
> > >
> > > In the kdump case, the kernel can move on by suppressing the irq flood.
> >
> > You're curing the symptom not the cause and the cure is just magic and
> > can't work reliably.
> Yeah, it is magic. But at least, it is better to printk something and
> alarm users about what happens. With current code, it may show nothing
> when system hangs.

Thanks Pingfan and Thomas for the points - I'd like to have a
mechanism in the kernel to warn users when an IRQ flood is potentially
happening.
Some time ago (2 years) we faced a similar issue in x86-64, a hard to
debug problem in kdump, that eventually was narrowed to a buggy NIC FW
flooding IRQs in kdump kernel, and no messages showed (although kernel
changed a lot since that time, today we might have better IRQ
handling/warning). We tried an early-boot fix, by disabling MSIs (as
per PCI spec) early in x86 boot, but it wasn't accepted - Bjorn asked
pertinent questions that I couldn't respond (I lost the reproducer)
[0].

Cheers,


Guilherme

[0] lore.kernel.org/linux-pci/20181018183721.27467-1-gpicc...@canonical.com

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [Skiboot] [PATCH 0/3] warn and suppress irqflood

2020-10-25 Thread Oliver O'Halloran
On Mon, Oct 26, 2020 at 12:11 AM Pingfan Liu  wrote:
>
> On Sun, Oct 25, 2020 at 8:21 PM Oliver O'Halloran  wrote:
> >
> > On Sun, Oct 25, 2020 at 10:22 PM Pingfan Liu  wrote:
> > >
> > > On Thu, Oct 22, 2020 at 4:37 PM Thomas Gleixner  
> > > wrote:
> > > >
> > > > On Thu, Oct 22 2020 at 13:56, Pingfan Liu wrote:
> > > > > I hit a irqflood bug on powerpc platform, and two years ago, on a x86 
> > > > > platform.
> > > > > When the bug happens, the kernel is totally occupies by irq.  
> > > > > Currently, there
> > > > > may be nothing or just soft lockup warning showed in console. It is 
> > > > > better
> > > > > to warn users with irq flood info.
> > > > >
> > > > > In the kdump case, the kernel can move on by suppressing the irq 
> > > > > flood.
> > > >
> > > > You're curing the symptom not the cause and the cure is just magic and
> > > > can't work reliably.
> > > Yeah, it is magic. But at least, it is better to printk something and
> > > alarm users about what happens. With current code, it may show nothing
> > > when system hangs.
> > > >
> > > > Where is that irq flood originated from and why is none of the
> > > > mechanisms we have in place to shut it up working?
> > > The bug originates from a driver tpm_i2c_nuvoton, which calls i2c-bus
> > > driver (i2c-opal.c). After i2c_opal_send_request(), the bug is
> > > triggered.
> > >
> > > But things are complicated by introducing a firmware layer: Skiboot.
> > > This software layer hides the detail of manipulating the hardware from
> > > Linux.
> > >
> > > I guess the software logic can not enter a sane state when kernel crashes.
> > >
> > > Cc Skiboot and ppc64 community to see whether anyone has idea about it.
> >
> > What system are you using?
>
> Here is the info, if not enough, I will get more.
>  Product Name  : OpenPOWER Firmware
>  Product Version   : open-power-SUPERMICRO-P9DSU-V1.16-20180531-imp
>  Product Extra : op-build-e4b3eb5
>  Product Extra : skiboot-v6.0-p1da203b
>  Product Extra : hostboot-f911e5c-pda8239f
>  Product Extra : occ-77bb5e6-p623d1cd
>  Product Extra : linux-4.16.7-openpower2-pbc45895
>  Product Extra : petitboot-v1.7.1-pf773c0d
>  Product Extra : machine-xml-218a77a

Unfortunately I don't have a schematic for that one.

> > There's an external interrupt pin which is supposed to be wired to the
> > TPM. I think we bounce that interrupt to FW by default since the
> > external interrupt is sometimes used for other system-specific
> > purposes. Odds are FW doesn't know what to do with it so you
> > effectively have an always-on LSI. I fixed a similar bug a while ago
> > by having skiboot mask any interrupts it doesn't have a handler for,
>
> This sounds like the root cause. But here Skiboot should have handler,
> otherwise the first kernel can not run smoothly.

I don't know why the TPM interrupt is asserted. If the TPM driver is
polling for a response it might clear the underlying condition as a
side effect of it's normal operation.

> Do you have any idea about an unexpected re-initialization introducing
> an unsane stage?

No idea, but those TPMs have a history of bricking themselves if you
do anything slightly odd to them. It wouldn't surprise me if the
re-probe can cause issues.

> Thanks,
> Pingfan

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [Skiboot] [PATCH 0/3] warn and suppress irqflood

2020-10-25 Thread Pingfan Liu
On Sun, Oct 25, 2020 at 8:21 PM Oliver O'Halloran  wrote:
>
> On Sun, Oct 25, 2020 at 10:22 PM Pingfan Liu  wrote:
> >
> > On Thu, Oct 22, 2020 at 4:37 PM Thomas Gleixner  wrote:
> > >
> > > On Thu, Oct 22 2020 at 13:56, Pingfan Liu wrote:
> > > > I hit a irqflood bug on powerpc platform, and two years ago, on a x86 
> > > > platform.
> > > > When the bug happens, the kernel is totally occupies by irq.  
> > > > Currently, there
> > > > may be nothing or just soft lockup warning showed in console. It is 
> > > > better
> > > > to warn users with irq flood info.
> > > >
> > > > In the kdump case, the kernel can move on by suppressing the irq flood.
> > >
> > > You're curing the symptom not the cause and the cure is just magic and
> > > can't work reliably.
> > Yeah, it is magic. But at least, it is better to printk something and
> > alarm users about what happens. With current code, it may show nothing
> > when system hangs.
> > >
> > > Where is that irq flood originated from and why is none of the
> > > mechanisms we have in place to shut it up working?
> > The bug originates from a driver tpm_i2c_nuvoton, which calls i2c-bus
> > driver (i2c-opal.c). After i2c_opal_send_request(), the bug is
> > triggered.
> >
> > But things are complicated by introducing a firmware layer: Skiboot.
> > This software layer hides the detail of manipulating the hardware from
> > Linux.
> >
> > I guess the software logic can not enter a sane state when kernel crashes.
> >
> > Cc Skiboot and ppc64 community to see whether anyone has idea about it.
>
> What system are you using?

Here is the info, if not enough, I will get more.
 Product Name  : OpenPOWER Firmware
 Product Version   : open-power-SUPERMICRO-P9DSU-V1.16-20180531-imp
 Product Extra : op-build-e4b3eb5
 Product Extra : skiboot-v6.0-p1da203b
 Product Extra : hostboot-f911e5c-pda8239f
 Product Extra : occ-77bb5e6-p623d1cd
 Product Extra : linux-4.16.7-openpower2-pbc45895
 Product Extra : petitboot-v1.7.1-pf773c0d
 Product Extra : machine-xml-218a77a

>
> There's an external interrupt pin which is supposed to be wired to the
> TPM. I think we bounce that interrupt to FW by default since the
> external interrupt is sometimes used for other system-specific
> purposes. Odds are FW doesn't know what to do with it so you
> effectively have an always-on LSI. I fixed a similar bug a while ago
> by having skiboot mask any interrupts it doesn't have a handler for,
This sounds like the root cause. But here Skiboot should have handler,
otherwise the first kernel can not run smoothly.

Do you have any idea about an unexpected re-initialization introducing
an unsane stage?

Thanks,
Pingfan

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [Skiboot] [PATCH 0/3] warn and suppress irqflood

2020-10-25 Thread Oliver O'Halloran
On Sun, Oct 25, 2020 at 10:22 PM Pingfan Liu  wrote:
>
> On Thu, Oct 22, 2020 at 4:37 PM Thomas Gleixner  wrote:
> >
> > On Thu, Oct 22 2020 at 13:56, Pingfan Liu wrote:
> > > I hit a irqflood bug on powerpc platform, and two years ago, on a x86 
> > > platform.
> > > When the bug happens, the kernel is totally occupies by irq.  Currently, 
> > > there
> > > may be nothing or just soft lockup warning showed in console. It is better
> > > to warn users with irq flood info.
> > >
> > > In the kdump case, the kernel can move on by suppressing the irq flood.
> >
> > You're curing the symptom not the cause and the cure is just magic and
> > can't work reliably.
> Yeah, it is magic. But at least, it is better to printk something and
> alarm users about what happens. With current code, it may show nothing
> when system hangs.
> >
> > Where is that irq flood originated from and why is none of the
> > mechanisms we have in place to shut it up working?
> The bug originates from a driver tpm_i2c_nuvoton, which calls i2c-bus
> driver (i2c-opal.c). After i2c_opal_send_request(), the bug is
> triggered.
>
> But things are complicated by introducing a firmware layer: Skiboot.
> This software layer hides the detail of manipulating the hardware from
> Linux.
>
> I guess the software logic can not enter a sane state when kernel crashes.
>
> Cc Skiboot and ppc64 community to see whether anyone has idea about it.

What system are you using?

There's an external interrupt pin which is supposed to be wired to the
TPM. I think we bounce that interrupt to FW by default since the
external interrupt is sometimes used for other system-specific
purposes. Odds are FW doesn't know what to do with it so you
effectively have an always-on LSI. I fixed a similar bug a while ago
by having skiboot mask any interrupts it doesn't have a handler for,
but I have no idea when that fix will land in a released FW build. Oh
well.

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 0/3] warn and suppress irqflood

2020-10-25 Thread Pingfan Liu
On Thu, Oct 22, 2020 at 4:37 PM Thomas Gleixner  wrote:
>
> On Thu, Oct 22 2020 at 13:56, Pingfan Liu wrote:
> > I hit a irqflood bug on powerpc platform, and two years ago, on a x86 
> > platform.
> > When the bug happens, the kernel is totally occupies by irq.  Currently, 
> > there
> > may be nothing or just soft lockup warning showed in console. It is better
> > to warn users with irq flood info.
> >
> > In the kdump case, the kernel can move on by suppressing the irq flood.
>
> You're curing the symptom not the cause and the cure is just magic and
> can't work reliably.
Yeah, it is magic. But at least, it is better to printk something and
alarm users about what happens. With current code, it may show nothing
when system hangs.
>
> Where is that irq flood originated from and why is none of the
> mechanisms we have in place to shut it up working?
The bug originates from a driver tpm_i2c_nuvoton, which calls i2c-bus
driver (i2c-opal.c). After i2c_opal_send_request(), the bug is
triggered.

But things are complicated by introducing a firmware layer: Skiboot.
This software layer hides the detail of manipulating the hardware from
Linux.

I guess the software logic can not enter a sane state when kernel crashes.

Cc Skiboot and ppc64 community to see whether anyone has idea about it.

Thanks,
Pingfan

___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 0/3] warn and suppress irqflood

2020-10-22 Thread Thomas Gleixner
On Thu, Oct 22 2020 at 13:56, Pingfan Liu wrote:
> I hit a irqflood bug on powerpc platform, and two years ago, on a x86 
> platform.
> When the bug happens, the kernel is totally occupies by irq.  Currently, there
> may be nothing or just soft lockup warning showed in console. It is better
> to warn users with irq flood info.
>
> In the kdump case, the kernel can move on by suppressing the irq flood.

You're curing the symptom not the cause and the cure is just magic and
can't work reliably.

Where is that irq flood originated from and why is none of the
mechanisms we have in place to shut it up working?

Thanks,

tglx





___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH 0/3] warn and suppress irqflood

2020-10-21 Thread Pingfan Liu
I hit a irqflood bug on powerpc platform, and two years ago, on a x86 platform.
When the bug happens, the kernel is totally occupies by irq.  Currently, there
may be nothing or just soft lockup warning showed in console. It is better
to warn users with irq flood info.

In the kdump case, the kernel can move on by suppressing the irq flood.

Cc: Thomas Gleixner 
Cc: Peter Zijlstra 
Cc: Jisheng Zhang 
Cc: Andrew Morton 
Cc: "Guilherme G. Piccoli" 
Cc: Petr Mladek 
Cc: Marc Zyngier 
Cc: Linus Walleij 
Cc: afzal mohammed 
Cc: Lina Iyer 
Cc: "Gustavo A. R. Silva" 
Cc: Maulik Shah 
Cc: Al Viro 
Cc: Jonathan Corbet 
Cc: Pawan Gupta 
Cc: Mike Kravetz 
Cc: Oliver Neukum 
To: linux-ker...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Cc: kexec@lists.infradead.org

Pingfan Liu (3):
  kernel/watchdog: show irq percentage if irq floods
  kernel/watchdog: suppress max irq when irq floods
  Documentation: introduce a param "irqflood_suppress"

 Documentation/admin-guide/kernel-parameters.txt |  3 ++
 include/linux/irq.h |  2 ++
 kernel/irq/spurious.c   | 32 +
 kernel/watchdog.c   | 48 +
 4 files changed, 85 insertions(+)

-- 
2.7.5


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec