This patch fixes one of the problems reported by Daniel Walker
(https://lkml.org/lkml/2015/6/24/44).

If "crash_kexec_post_notifiers" boot option is specified,
other cpus are stopped by smp_send_stop() before entering
crash_kexec(), while usually machine_crash_shutdown() called by
crash_kexec() does that.  This behavior change leads two problems.

 Problem 1:
 Some functions in the crash_kexec() path depend on other cpus being
 still online.  If other cpus have been offlined already, they
 doesn't work properly.

  Example (MIPS OCTEON case):
   panic()
    crash_kexec()
     machine_crash_shutdown()
      octeon_generic_shutdown() // shutdown watchdog for ONLINE cpus
     machine_kexec()

 Problem 2:
 Most of architectures stop other cpus in the machine_crash_shutdown()
 path and save register information at that time.  However, if
 smp_send_stop() is called before that, we can't save the register
 information.

This patch solves the problem 2 by replacing smp_send_stop() in
panic() with panic_smp_stop_cpus() which is a weak function and can be
replaced with suitable version for crash_kexec context.  In fact,
x86 replaces it with a function based on kdump_nmi_shootdown_cpus() to
stop other cpus and save their states.

Please note that crash_kexec() can be called directly without
entering panic().  A stop-other-cpus procedure is still needed
by crash_kexec().

Changes in V2:
- Replace smp_send_stop() call with crash_kexec version which
  saves cpu states and cleans up VMX/SVM
- Drop a fix for Problem 1 at this moment

Reported-by: Daniel Walker <[email protected]>
Fixes: f06e5153f4ae (kernel/panic.c: add "crash_kexec_post_notifiers" option
Signed-off-by: Hidehiro Kawai <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Eric Biederman <[email protected]>
Cc: Vivek Goyal <[email protected]>
---
 arch/x86/kernel/crash.c |   16 +++++++++++-----
 kernel/panic.c          |   29 +++++++++++++++++++++++------
 2 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index e068d66..913c621 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -130,16 +130,22 @@ static void kdump_nmi_callback(int cpu, struct pt_regs 
*regs)
        disable_local_APIC();
 }
 
-static void kdump_nmi_shootdown_cpus(void)
+/* Please see the comment on the weak version in kernel/panic.c */
+void panic_smp_stop_cpus(void)
 {
+       static int cpus_stopped;
+
        in_crash_kexec = 1;
-       nmi_shootdown_cpus(kdump_nmi_callback);
 
-       disable_local_APIC();
+       if (!cpus_stopped) {
+               nmi_shootdown_cpus(kdump_nmi_callback);
+               disable_local_APIC();
+               cpus_stopped = 1;
+       }
 }
 
 #else
-static void kdump_nmi_shootdown_cpus(void)
+void panic_smp_stop_cpus(void)
 {
        /* There are no cpus to shootdown */
 }
@@ -158,7 +164,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
        /* The kernel is broken so disable interrupts */
        local_irq_disable();
 
-       kdump_nmi_shootdown_cpus();
+       panic_smp_stop_cpus();
 
        /*
         * VMCLEAR VMCSs loaded on this cpu if needed.
diff --git a/kernel/panic.c b/kernel/panic.c
index 04e91ff..a507637 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -60,6 +60,28 @@ void __weak panic_smp_self_stop(void)
                cpu_relax();
 }
 
+/*
+ * Stop other cpus in panic.  Architecture code may override this to
+ * with more suitable version.  Moreover, if the architecture supports
+ * crash dump, it should also save the states of stopped cpus.
+ *
+ * This function should be called only once.
+ */
+void __weak panic_smp_stop_cpus(void)
+{
+       static int cpus_stopped;
+
+       if (!cpus_stopped) {
+               /*
+                * Note smp_send_stop is the usual smp shutdown function,
+                * which unfortunately means it may not be hardened to
+                * work in a panic situation.
+                */
+               smp_send_stop();
+               cpus_stopped = 1;
+       }
+}
+
 /**
  *     panic - halt the system
  *     @fmt: The text string to print
@@ -120,12 +142,7 @@ void panic(const char *fmt, ...)
        if (!crash_kexec_post_notifiers)
                crash_kexec(NULL);
 
-       /*
-        * Note smp_send_stop is the usual smp shutdown function, which
-        * unfortunately means it may not be hardened to work in a panic
-        * situation.
-        */
-       smp_send_stop();
+       panic_smp_stop_cpus();
 
        /*
         * Run any panic handlers, including those that might need to


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to