When kexec goes to issue an nmi it uses set_nmi_callback() to have the
other cpus execute the proper shutdown code. Unfortunately, under certain
situations set_nmi_callback will fail (ie oprofile has it reserved
already). This will cause kexec/kdump to hang and do nothing. :(
After talking to Andi, he mentioned that subsystems should be using the
notifier callback on the die chain instead. The included patch
incorporates that. The priority is set to 0, hopefully causing the
notifier to be the first one called.
However, after talking to Vivek about this, he mentioned that he could
still invision conditions (the die_chain is corrupted) where even this
procedure might not work.
I believe using the notifier is safer for now. Plus I am working on a
patch that removes the set_nmi_callback()/unset_nmi_callback() (hence my
push for this patch), so I would like to have this patch go in. :)
Vivek also mentioned some other work at replacing the nmi stack
completely, which would make this patch moot. But I don't know the state
of it.
Any comments/questions/feedback let me know.
Cheers,
Don
diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c
index 2b0cfce..f078c84 100644
--- a/arch/i386/kernel/crash.c
+++ b/arch/i386/kernel/crash.c
@@ -23,6 +23,7 @@ #include <asm/nmi.h>
#include <asm/hw_irq.h>
#include <asm/apic.h>
#include <mach_ipi.h>
+#include <asm/kdebug.h>
/* This keeps a track of which one is crashing cpu. */
@@ -93,31 +94,46 @@ static void crash_save_self(struct pt_re
#ifdef CONFIG_SMP
static atomic_t waiting_for_crash_ipi;
-static int crash_nmi_callback(struct pt_regs *regs, int cpu)
+int crash_nmi_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
{
- struct pt_regs fixed_regs;
-
- /* Don't do anything if this handler is invoked on crashing cpu.
- * Otherwise, system will completely hang. Crashing cpu can get
- * an NMI if system was initially booted with nmi_watchdog parameter.
- */
- if (cpu == crashing_cpu)
- return 1;
- local_irq_disable();
-
- if (!user_mode_vm(regs)) {
- crash_fixup_ss_esp(&fixed_regs, regs);
- regs = &fixed_regs;
+ struct die_args *args = (struct die_args *)data;
+ int cpu = smp_processor_id();
+ int ret = NOTIFY_DONE;
+
+ switch(val) {
+ case DIE_NMI_IPI:
+ /*
+ * Don't do anything if this handler is invoked on crashing cpu.
+ * Otherwise, system will completely hang. Crashing cpu can get
+ * an NMI if system was initially booted with nmi_watchdog
parameter.
+ */
+ if (cpu == crashing_cpu)
+ return NOTIFY_STOP;
+ local_irq_disable();
+
+ if (!user_mode_vm(regs)) {
+ crash_fixup_ss_esp(&fixed_regs, regs);
+ regs = &fixed_regs;
+ }
+ crash_save_this_cpu(args->regs, cpu);
+ disable_local_APIC();
+ atomic_dec(&waiting_for_crash_ipi);
+ /* Assume hlt works */
+ halt();
+ for(;;);
+ break;
+ default:
+ break;
}
- crash_save_this_cpu(regs, cpu);
- disable_local_APIC();
- atomic_dec(&waiting_for_crash_ipi);
- /* Assume hlt works */
- halt();
- for(;;);
-
- return 1;
+ return ret;
}
+
+static struct notifier_block crash_nmi_exceptions_nb = {
+ .notifier_call = crash_nmi_exceptions_notify,
+ .next = NULL,
+ .priority = 0
+};
/*
* By using the NMI code instead of a vector we just sneak thru the
@@ -126,7 +142,7 @@ static int crash_nmi_callback(struct pt_
*/
static void smp_send_nmi_allbutself(void)
{
- send_IPI_allbutself(APIC_DM_NMI);
+ send_IPI_allbutself(DIE_NMI_IPI);
}
static void nmi_shootdown_cpus(void)
@@ -135,7 +151,7 @@ static void nmi_shootdown_cpus(void)
atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
/* Would it be better to replace the trap vector here? */
- set_nmi_callback(crash_nmi_callback);
+ register_die_notifier(&crash_nmi_exceptions_nb);
/* Ensure the new callback function is set before sending
* out the NMI
*/
diff --git a/arch/x86_64/kernel/crash.c b/arch/x86_64/kernel/crash.c
index 4e6c3b7..ed23a7d 100644
--- a/arch/x86_64/kernel/crash.c
+++ b/arch/x86_64/kernel/crash.c
@@ -23,6 +23,7 @@ #include <asm/hardirq.h>
#include <asm/nmi.h>
#include <asm/hw_irq.h>
#include <asm/mach_apic.h>
+#include <asm/kdebug.h>
/* This keeps a track of which one is crashing cpu. */
static int crashing_cpu;
@@ -95,30 +96,46 @@ static void crash_save_self(struct pt_re
#ifdef CONFIG_SMP
static atomic_t waiting_for_crash_ipi;
-static int crash_nmi_callback(struct pt_regs *regs, int cpu)
+int crash_nmi_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
{
- /*
- * Don't do anything if this handler is invoked on crashing cpu.
- * Otherwise, system will completely hang. Crashing cpu can get
- * an NMI if system was initially booted with nmi_watchdog parameter.
- */
- if (cpu == crashing_cpu)
- return 1;
- local_irq_disable();
-
- crash_save_this_cpu(regs, cpu);
- disable_local_APIC();
- atomic_dec(&waiting_for_crash_ipi);
- /* Assume hlt works */
- for(;;)
- asm("hlt");
-
- return 1;
+ struct die_args *args = (struct die_args *)data;
+ int cpu = smp_processor_id();
+ int ret = NOTIFY_DONE;
+
+ switch(val) {
+ case DIE_NMI_IPI:
+ /*
+ * Don't do anything if this handler is invoked on crashing cpu.
+ * Otherwise, system will completely hang. Crashing cpu can get
+ * an NMI if system was initially booted with nmi_watchdog
parameter.
+ */
+ if (cpu == crashing_cpu)
+ return NOTIFY_STOP;
+ local_irq_disable();
+
+ crash_save_this_cpu(args->regs, cpu);
+ disable_local_APIC();
+ atomic_dec(&waiting_for_crash_ipi);
+ /* Assume hlt works */
+ for(;;)
+ asm("hlt");
+ break;
+ default:
+ break;
+ }
+ return ret;
}
+
+static struct notifier_block crash_nmi_exceptions_nb = {
+ .notifier_call = crash_nmi_exceptions_notify,
+ .next = NULL,
+ .priority = 0
+};
static void smp_send_nmi_allbutself(void)
{
- send_IPI_allbutself(APIC_DM_NMI);
+ send_IPI_allbutself(DIE_NMI_IPI);
}
/*
@@ -132,7 +149,7 @@ static void nmi_shootdown_cpus(void)
unsigned long msecs;
atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
- set_nmi_callback(crash_nmi_callback);
+ register_die_notifier(&crash_nmi_exceptions_nb);
/*
* Ensure the new callback function is set before sending
_______________________________________________
fastboot mailing list
[email protected]
https://lists.osdl.org/mailman/listinfo/fastboot