PANIC_MCE: Introduce a new panic flag for fatal MCE, capture related information

Fatal machine check exceptions (caused due to hardware memory errors) will now
result in a 'slim' coredump that captures vital information about the MCE. This
patch introduces a new panic flag, and new parameters to *panic functions
that can capture more information pertaining to the cause of crash.

Enable a new elf-notes section to store additional information about the crash.
For MCE, enable a new notes section that captures relevant register status
(struct mce) to be later read during coredump analysis.

Signed-off-by: K.Prasad <[email protected]>
---
 arch/arm/kernel/traps.c          |    2 +-
 arch/powerpc/kernel/traps.c      |    2 +-
 arch/sh/kernel/traps_32.c        |    2 +-
 arch/x86/kernel/cpu/mcheck/mce.c |    7 +++----
 arch/x86/kernel/dumpstack.c      |    2 +-
 include/linux/elf.h              |    5 +++++
 include/linux/kernel.h           |    9 +++++----
 include/linux/kexec.h            |    9 ++++++---
 kernel/kexec.c                   |   17 +++++++++++------
 kernel/panic.c                   |   16 ++++++++--------
 10 files changed, 42 insertions(+), 29 deletions(-)

Index: linux-2.6.slim_kdump/include/linux/kernel.h
===================================================================
--- linux-2.6.slim_kdump.orig/include/linux/kernel.h
+++ linux-2.6.slim_kdump/include/linux/kernel.h
@@ -178,17 +178,18 @@ static inline void might_fault(void)
 enum panic_flags {
        PANIC_NO_KEXEC     = (1 << 0),
        PANIC_NO_BACKTRACE = (1 << 1),
+       PANIC_MCE          = (1 << 2),
 };
 
 extern struct atomic_notifier_head panic_notifier_list;
 extern long (*panic_blink)(int state);
 NORET_TYPE void panic(const char * fmt, ...)
        __attribute__ ((NORET_AND format (printf, 1, 2))) __cold;
-NORET_TYPE void xpanic(enum panic_flags flags, int timeout,
-                      const char *fmt, ...)
-       __attribute__ ((NORET_AND format (printf, 3, 4))) __cold;
+NORET_TYPE void xpanic(enum panic_flags flags, int timeout, void *arch_info,
+                       size_t arch_info_size, const char *fmt, ...)
+       __attribute__ ((NORET_AND format (printf, 5, 6))) __cold;
 NORET_TYPE void vpanic(enum panic_flags flags, int timeout,
-                      const char *fmt,
+                      void *arch_info, size_t arch_info_size, const char *fmt,
                       va_list ap) __noreturn __cold;
 extern void oops_enter(void);
 extern void oops_exit(void);
Index: linux-2.6.slim_kdump/kernel/panic.c
===================================================================
--- linux-2.6.slim_kdump.orig/kernel/panic.c
+++ linux-2.6.slim_kdump/kernel/panic.c
@@ -61,21 +61,21 @@ NORET_TYPE void panic(const char *fmt, .
 {
        va_list ap;
        va_start(ap, fmt);
-       vpanic(0, 0, fmt, ap);
+       vpanic(0, 0, NULL, 0, fmt, ap);
 }
 EXPORT_SYMBOL(panic);
 
-NORET_TYPE void xpanic(enum panic_flags flags, int timeout,
-                       const char *fmt, ...)
+NORET_TYPE void xpanic(enum panic_flags flags, int timeout, void *arch_info,
+                       size_t arch_info_size, const char *fmt, ...)
 {
        va_list ap;
        va_start(ap, fmt);
-       vpanic(flags, timeout, fmt, ap);
+       vpanic(flags, timeout, arch_info, arch_info_size, fmt, ap);
 }
 EXPORT_SYMBOL(xpanic);
 
-NORET_TYPE void vpanic(enum panic_flags flags, int timeout,
-                      const char * fmt, va_list args)
+NORET_TYPE void vpanic(enum panic_flags flags, int timeout, void *arch_info,
+                       size_t arch_info_size, const char * fmt, va_list args)
 {
        static char buf[1024];
        long i, i_next = 0;
@@ -99,7 +99,7 @@ NORET_TYPE void vpanic(enum panic_flags
        vsnprintf(buf, sizeof(buf), fmt, args);
        printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf);
 #ifdef CONFIG_DEBUG_BUGVERBOSE
-       if (!(flags & PANIC_NO_BACKTRACE))
+       if (!(flags & (PANIC_NO_BACKTRACE | PANIC_MCE)))
                dump_stack();
 #endif
 
@@ -109,7 +109,7 @@ NORET_TYPE void vpanic(enum panic_flags
         * Do we want to call this before we try to display a message?
         */
        if (!(flags & PANIC_NO_KEXEC))
-               crash_kexec(NULL);
+               crash_kexec(NULL, arch_info, arch_info_size, flags);
 
        kmsg_dump(KMSG_DUMP_PANIC);
 
Index: linux-2.6.slim_kdump/arch/x86/kernel/cpu/mcheck/mce.c
===================================================================
--- linux-2.6.slim_kdump.orig/arch/x86/kernel/cpu/mcheck/mce.c
+++ linux-2.6.slim_kdump/arch/x86/kernel/cpu/mcheck/mce.c
@@ -258,8 +258,7 @@ static void wait_for_panic(void)
        local_irq_enable();
        while (timeout-- > 0)
                udelay(1);
-       xpanic(PANIC_NO_KEXEC|PANIC_NO_BACKTRACE, 0,
-               "Panicing machine check CPU died");
+       xpanic(PANIC_MCE, 0, NULL, 0, "Panicing machine check CPU died");
 }
 
 static void mce_panic(char *msg, struct mce *final, char *exp)
@@ -315,8 +314,8 @@ static void mce_panic(char *msg, struct
        if (exp)
                pr_emerg(HW_ERR "Machine check: %s\n", exp);
        if (!fake_panic) {
-               xpanic(PANIC_NO_KEXEC|PANIC_NO_BACKTRACE, mce_panic_timeout,
-                       msg);
+               xpanic(PANIC_MCE, mce_panic_timeout, final,
+                       sizeof(struct mce), msg);
        } else
                pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg);
 }
Index: linux-2.6.slim_kdump/arch/arm/kernel/traps.c
===================================================================
--- linux-2.6.slim_kdump.orig/arch/arm/kernel/traps.c
+++ linux-2.6.slim_kdump/arch/arm/kernel/traps.c
@@ -274,7 +274,7 @@ void die(const char *str, struct pt_regs
        ret = __die(str, err, thread, regs);
 
        if (regs && kexec_should_crash(thread->task))
-               crash_kexec(regs);
+               crash_kexec(regs, NULL, 0, 0);
 
        bust_spinlocks(0);
        add_taint(TAINT_DIE);
Index: linux-2.6.slim_kdump/arch/powerpc/kernel/traps.c
===================================================================
--- linux-2.6.slim_kdump.orig/arch/powerpc/kernel/traps.c
+++ linux-2.6.slim_kdump/arch/powerpc/kernel/traps.c
@@ -161,7 +161,7 @@ int die(const char *str, struct pt_regs
 
        if (kexec_should_crash(current) ||
                kexec_sr_activated(smp_processor_id()))
-               crash_kexec(regs);
+               crash_kexec(regs, NULL, 0, 0);
        crash_kexec_secondary(regs);
 
        if (in_interrupt())
Index: linux-2.6.slim_kdump/arch/sh/kernel/traps_32.c
===================================================================
--- linux-2.6.slim_kdump.orig/arch/sh/kernel/traps_32.c
+++ linux-2.6.slim_kdump/arch/sh/kernel/traps_32.c
@@ -106,7 +106,7 @@ void die(const char * str, struct pt_reg
        oops_exit();
 
        if (kexec_should_crash(current))
-               crash_kexec(regs);
+               crash_kexec(regs, NULL, 0, 0);
 
        if (in_interrupt())
                panic("Fatal exception in interrupt");
Index: linux-2.6.slim_kdump/arch/x86/kernel/dumpstack.c
===================================================================
--- linux-2.6.slim_kdump.orig/arch/x86/kernel/dumpstack.c
+++ linux-2.6.slim_kdump/arch/x86/kernel/dumpstack.c
@@ -241,7 +241,7 @@ EXPORT_SYMBOL_GPL(oops_begin);
 void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 {
        if (regs && kexec_should_crash(current))
-               crash_kexec(regs);
+               crash_kexec(regs, NULL, 0, 0);
 
        bust_spinlocks(0);
        die_owner = -1;
Index: linux-2.6.slim_kdump/include/linux/elf.h
===================================================================
--- linux-2.6.slim_kdump.orig/include/linux/elf.h
+++ linux-2.6.slim_kdump/include/linux/elf.h
@@ -381,6 +381,11 @@ typedef struct elf64_shdr {
 #define NT_PRPSINFO    3
 #define NT_TASKSTRUCT  4
 #define NT_AUXV                6
+/*
+ * Although numbers 1 - 6 have been defined here, the user-space include files
+ * have numbers 1 - 20 taken up. Hence defining NT_MCE as 21.
+ */
+#define NT_MCE         21              /* Machine Check Exception related data 
*/
 #define NT_PRXFPREG     0x46e62b7f      /* copied from 
gdb5.1/include/elf/common.h */
 #define NT_PPC_VMX     0x100           /* PowerPC Altivec/VMX registers */
 #define NT_PPC_SPE     0x101           /* PowerPC SPE/EVR registers */
Index: linux-2.6.slim_kdump/include/linux/kexec.h
===================================================================
--- linux-2.6.slim_kdump.orig/include/linux/kexec.h
+++ linux-2.6.slim_kdump/include/linux/kexec.h
@@ -125,10 +125,12 @@ extern asmlinkage long compat_sys_kexec_
 #endif
 extern struct page *kimage_alloc_control_pages(struct kimage *image,
                                                unsigned int order);
-extern void crash_kexec(struct pt_regs *);
+extern void crash_kexec(struct pt_regs *, void *arch_info,
+                       size_t arch_info_size, enum panic_flags);
 int kexec_should_crash(struct task_struct *);
 void crash_save_cpu(struct pt_regs *regs, int cpu);
-void crash_save_vmcoreinfo(void);
+void crash_save_vmcoreinfo(void *arch_info, size_t arch_info_size,
+                                               enum panic_flags);
 void arch_crash_save_vmcoreinfo(void);
 void vmcoreinfo_append_str(const char *fmt, ...)
        __attribute__ ((format (printf, 1, 2)));
@@ -213,7 +215,8 @@ void crash_free_reserved_phys_range(unsi
 #else /* !CONFIG_KEXEC */
 struct pt_regs;
 struct task_struct;
-static inline void crash_kexec(struct pt_regs *regs) { }
+static inline void crash_kexec(struct pt_regs *regs, void *arch_info,
+                               size_t arch_info_size, enum panic_flags) { }
 static inline int kexec_should_crash(struct task_struct *p) { return 0; }
 #endif /* CONFIG_KEXEC */
 #endif /* LINUX_KEXEC_H */
Index: linux-2.6.slim_kdump/kernel/kexec.c
===================================================================
--- linux-2.6.slim_kdump.orig/kernel/kexec.c
+++ linux-2.6.slim_kdump/kernel/kexec.c
@@ -1065,7 +1065,8 @@ asmlinkage long compat_sys_kexec_load(un
 }
 #endif
 
-void crash_kexec(struct pt_regs *regs)
+void crash_kexec(struct pt_regs *regs, void *arch_info, size_t arch_info_size,
+                enum panic_flags flags)
 {
        /* Take the kexec_mutex here to prevent sys_kexec_load
         * running on one cpu from replacing the crash kernel
@@ -1082,7 +1083,7 @@ void crash_kexec(struct pt_regs *regs)
                        kmsg_dump(KMSG_DUMP_KEXEC);
 
                        crash_setup_regs(&fixed_regs, regs);
-                       crash_save_vmcoreinfo();
+                       crash_save_vmcoreinfo(arch_info, arch_info_size, flags);
                        machine_crash_shutdown(&fixed_regs);
                        machine_kexec(kexec_crash_image);
                }
@@ -1381,7 +1382,8 @@ int __init parse_crashkernel(char                  *cm
 
 
 
-void crash_save_vmcoreinfo(void)
+void crash_save_vmcoreinfo(void *arch_info, size_t arch_info_size,
+                                               enum panic_flags flags)
 {
        u32 *buf;
 
@@ -1392,9 +1394,12 @@ void crash_save_vmcoreinfo(void)
 
        buf = (u32 *)vmcoreinfo_note;
 
-       buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
-                             vmcoreinfo_size);
-
+       if (flags & PANIC_MCE)
+               buf = append_elf_note(buf, "PANIC_MCE", NT_MCE, arch_info,
+                                       arch_info_size);
+       else
+               buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0,
+                               vmcoreinfo_data, vmcoreinfo_size);
        final_note(buf);
 }
 

_______________________________________________
kexec mailing list
[email protected]
http://lists.infradead.org/mailman/listinfo/kexec

Reply via email to