Some machine-wide activities can cause spurious softlockup watchdog
warnings, so add a mechanism to allow the watchdog to be disabled.

The most obvious activity is suspend/resume, but long sysrq output can
also stall the system long enough to cause problems.

Signed-off-by: Jeremy Fitzhardinge <[EMAIL PROTECTED]>
Cc: Ingo Molnar <[EMAIL PROTECTED]>
Cc: Prarit Bhargava <[EMAIL PROTECTED]>
Cc: Chris Lalancette <[EMAIL PROTECTED]>
Cc: Eric Dumazet <[EMAIL PROTECTED]>

---
 drivers/char/sysrq.c  |    8 +++++
 include/linux/sched.h |   10 ++++++
 kernel/panic.c        |    3 +-
 kernel/power/swsusp.c |    3 +-
 kernel/softlockup.c   |   72 ++++++++++++++++++++++++++++++++++++++++++-------
 kernel/timer.c        |    4 ++
 6 files changed, 87 insertions(+), 13 deletions(-)

===================================================================
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -211,7 +211,11 @@ static struct sysrq_key_op sysrq_showreg
 
 static void sysrq_handle_showstate(int key, struct tty_struct *tty)
 {
+       softlockup_global_disable();  /* may take a while */
+
        show_state();
+
+       softlockup_global_enable();
 }
 static struct sysrq_key_op sysrq_showstate_op = {
        .handler        = sysrq_handle_showstate,
@@ -222,7 +226,11 @@ static struct sysrq_key_op sysrq_showsta
 
 static void sysrq_handle_showstate_blocked(int key, struct tty_struct *tty)
 {
+       softlockup_global_disable();  /* may take a while */
+
        show_state_filter(TASK_UNINTERRUPTIBLE);
+
+       softlockup_global_enable();
 }
 static struct sysrq_key_op sysrq_showstate_blocked_op = {
        .handler        = sysrq_handle_showstate_blocked,
===================================================================
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -243,6 +243,10 @@ extern int  softlockup_disable(void);
 extern int  softlockup_disable(void);
 extern void softlockup_enable(int state);
 
+/* Disable/re-enable softlockup watchdog on all CPUs */
+extern void softlockup_global_disable(void);
+extern void softlockup_global_enable(void);
+
 extern void spawn_softlockup_task(void);
 extern void touch_softlockup_watchdog(void);
 #else
@@ -263,6 +267,12 @@ static inline void softlockup_enable(int
 static inline void softlockup_enable(int state)
 {
        preempt_enable();
+}
+static inline void softlockup_global_enable(void)
+{
+}
+static inline void softlockup_global_disable(void)
+{
 }
 static inline void spawn_softlockup_task(void)
 {
===================================================================
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -131,8 +131,9 @@ NORET_TYPE void panic(const char * fmt, 
         disabled_wait(caller);
 #endif
        local_irq_enable();
+       softlockup_global_disable();
+
        for (i = 0;;) {
-               touch_softlockup_watchdog();
                i += panic_blink(i);
                mdelay(1);
                i++;
===================================================================
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -289,6 +289,7 @@ int swsusp_suspend(void)
         * that suspended with irqs off ... no overall powerup.
         */
        device_power_up();
+       softlockup_global_disable();
  Enable_irqs:
        local_irq_enable();
        return error;
@@ -323,7 +324,7 @@ int swsusp_resume(void)
         */
        swsusp_free();
        restore_processor_state();
-       touch_softlockup_watchdog();
+       softlockup_global_enable();
        device_power_up();
        local_irq_enable();
        return error;
===================================================================
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -17,10 +17,21 @@
 
 static DEFINE_SPINLOCK(print_lock);
 
+/*
+ * Since sched_clock() is inherently per-cpu, its not possible to
+ * update another CPU's timestamp.  To deal with this, we add an extra
+ * state meaning "enabled, but timestamp needs update".
+ */
+enum state {
+       SL_OFF = 0,             /* disabled */
+       SL_UPDATE,              /* enabled, but timestamp old */
+       SL_ON,                  /* enabled */
+};
+
 static DEFINE_PER_CPU(unsigned long, touch_timestamp);
 static DEFINE_PER_CPU(unsigned long, print_timestamp);
 static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
-static DEFINE_PER_CPU(int, enabled);
+static DEFINE_PER_CPU(enum state, softlock_state);
 
 static int did_panic = 0;
 
@@ -48,7 +59,12 @@ static unsigned long get_timestamp(void)
 
 void inline touch_softlockup_watchdog(void)
 {
+       if (__raw_get_cpu_var(softlock_state) == SL_OFF)
+               return;
+
        __raw_get_cpu_var(touch_timestamp) = get_timestamp();
+       barrier();
+       __raw_get_cpu_var(softlock_state) = SL_ON;
 }
 EXPORT_SYMBOL(touch_softlockup_watchdog);
 
@@ -58,7 +74,7 @@ EXPORT_SYMBOL(touch_softlockup_watchdog)
  */
 void inline softlockup_tick_disable(void)
 {
-       __get_cpu_var(enabled) = 0;
+       __get_cpu_var(softlock_state) = SL_OFF;
 }
 
 /*
@@ -73,7 +89,7 @@ int softlockup_disable(void)
 
        preempt_disable();
 
-       ret = __get_cpu_var(enabled);
+       ret = __get_cpu_var(softlock_state) == SL_OFF;
        softlockup_tick_disable();
 
        return ret;
@@ -86,7 +102,7 @@ EXPORT_SYMBOL(softlockup_disable);
  */
 void inline softlockup_tick_enable(void)
 {
-       __get_cpu_var(enabled) = 1;
+       __get_cpu_var(softlock_state) = SL_UPDATE;
 }
 
 /*
@@ -96,15 +112,41 @@ void softlockup_enable(int state)
 void softlockup_enable(int state)
 {
        if (state) {
-               touch_softlockup_watchdog();
-               /* update timestamp before enable */
-               barrier();
                softlockup_tick_enable();
+               touch_softlockup_watchdog();
        }
 
        preempt_enable();
 }
 EXPORT_SYMBOL(softlockup_enable);
+
+/*
+ * Disable softlockup watchdog on all CPUs.  This is useful for
+ * globally disruptive activities, like suspend/resume or large sysrq
+ * debug outputs.
+ */
+void softlockup_global_disable()
+{
+       unsigned cpu;
+
+       for_each_online_cpu(cpu)
+               per_cpu(softlock_state, cpu) = SL_OFF;
+}
+EXPORT_SYMBOL(softlockup_global_disable);
+
+/*
+ * Globally re-enable soft lockups.  This will obviously interfere
+ * with any CPU's local softlockup disable, but with luck that won't
+ * matter.
+ */
+void softlockup_global_enable()
+{
+       unsigned cpu;
+
+       for_each_online_cpu(cpu)
+               per_cpu(softlock_state, cpu) = SL_UPDATE;
+}
+EXPORT_SYMBOL(softlockup_global_enable);
 
 /*
  * This callback runs from the timer interrupt, and checks
@@ -117,9 +159,19 @@ void softlockup_tick(void)
        unsigned long print_timestamp;
        unsigned long now;
 
-       /* return if not enabled */
-       if (!__get_cpu_var(enabled))
-               return;
+       switch(__get_cpu_var(softlock_state)) {
+       case SL_OFF:
+               /* not enabled */
+               return;
+
+       case SL_UPDATE:
+               /* update timestamp */
+               touch_softlockup_watchdog();
+               return;
+
+       case SL_ON:
+               break;
+       }
 
        print_timestamp = __get_cpu_var(print_timestamp);
 
===================================================================
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1011,7 +1011,7 @@ static int timekeeping_resume(struct sys
        timekeeping_suspended = 0;
        write_sequnlock_irqrestore(&xtime_lock, flags);
 
-       touch_softlockup_watchdog();
+       softlockup_global_enable();
 
        clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL);
 
@@ -1029,6 +1029,8 @@ static int timekeeping_suspend(struct sy
        timekeeping_suspended = 1;
        timekeeping_suspend_time = read_persistent_clock();
        write_sequnlock_irqrestore(&xtime_lock, flags);
+
+       softlockup_global_disable();
 
        clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
 

-- 

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/virtualization

Reply via email to