Fernando Luis Vázquez Cao <[email protected]> writes:

> Subject: [PATCH] boot: ignore early NMIs
>
> From: Fernando Luis Vazquez Cao <[email protected]>
>
> NMIs very early in the boot process are rarely critical (usually
> it just means that there was a spurious bit flip somewhere in the
> hardware, or that this is a kdump kernel and we received an NMI
> generated in the previous context), so the current behavior of
> halting the system when one occurs is probably a bit over the top.
>
> This patch changes the early IDT so that NMIs are ignored and the
> kernel can, hopefully, continue executing other code. Harsher
> measures (panic, etc) are defered to the final NMI handler, which
> can actually make an informed decision.
>
> This issue presented itself in our environment as seemingly
> random hangs in kdump.
>
> Signed-off-by: Fernando Luis Vazquez Cao <[email protected]>
> ---
>
> diff -urNp linux-3.3-rc6-orig/arch/x86/kernel/head64.c 
> linux-3.3-rc6/arch/x86/kernel/head64.c
> --- linux-3.3-rc6-orig/arch/x86/kernel/head64.c       2012-03-07 
> 15:49:01.834241787 +0900
> +++ linux-3.3-rc6/arch/x86/kernel/head64.c    2012-03-07 18:39:03.173732875 
> +0900
> @@ -71,7 +71,7 @@ void __init x86_64_start_kernel(char * r
>                               (__START_KERNEL & PGDIR_MASK)));
>       BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
>  
> -     /* clear bss before set_intr_gate with early_idt_handler */
> +     /* clear bss before set_intr_gate with early_idt_handlers */
>       clear_bss();
>  
>       /* Make NULL pointers segfault */
> @@ -79,13 +79,8 @@ void __init x86_64_start_kernel(char * r
>  
>       max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT;
>  
> -     for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) {
> -#ifdef CONFIG_EARLY_PRINTK
> +     for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
>               set_intr_gate(i, &early_idt_handlers[i]);
> -#else
> -             set_intr_gate(i, early_idt_handler);
> -#endif
> -     }
>       load_idt((const struct desc_ptr *)&idt_descr);
>  
>       if (console_loglevel == 10)
> diff -urNp linux-3.3-rc6-orig/arch/x86/kernel/head_64.S 
> linux-3.3-rc6/arch/x86/kernel/head_64.S
> --- linux-3.3-rc6-orig/arch/x86/kernel/head_64.S      2012-03-07 
> 15:49:01.838241839 +0900
> +++ linux-3.3-rc6/arch/x86/kernel/head_64.S   2012-03-07 18:41:21.811516876 
> +0900
> @@ -270,18 +270,29 @@ bad_address:
>       jmp bad_address
>  
>       .section ".init.text","ax"
> -#ifdef CONFIG_EARLY_PRINTK
>       .globl early_idt_handlers
>  early_idt_handlers:
> -     i = 0
> +     vector = 0
>       .rept NUM_EXCEPTION_VECTORS
> -     movl $i, %esi
> -     jmp early_idt_handler
> -     i = i + 1
> +     /*
> +      * NMIs (vector 2) this early in the boot process are rarely critical
> +      * (usually it just means that there was a spurious bit flip somewhere
> +      * in the hardware, or that this is a kdump kernel and we received an
> +      * NMI generated in the previous context), so we ignore them here and
> +      * try to continue (see early_nmi_handler implementation below).
> +      * Harsher measures (panic, etc) are defered to the final NMI handler,
> +      * which can actually make an informed decision.
> +      */
> +     .if vector == 2
> +     jmp early_nmi_handler

Is just a jump and not a move followed by a jump still 10 bytes?
I hate to say it but I think this fails miserably for any exception
after a nmi.

I expect the simplest solution is to modify early_idt_handler to test
for vector == 2.

Doing something less brittle than:
> extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][10];
in segment.h might be a good idea as well.

Eric

> +     .else
> +     movl $vector, %esi
> +     jmp early_exception_handler
> +     .endif
> +     vector = vector + 1
>       .endr
> -#endif
>  
> -ENTRY(early_idt_handler)
> +early_exception_handler:
>  #ifdef CONFIG_EARLY_PRINTK
>       cmpl $2,early_recursion_flag(%rip)
>       jz  1f
> @@ -315,6 +326,9 @@ ENTRY(early_idt_handler)
>  1:   hlt
>       jmp 1b
>  
> +early_nmi_handler:
> +     iretq
> +
>  #ifdef CONFIG_EARLY_PRINTK
>  early_recursion_flag:
>       .long 0

_______________________________________________
kexec mailing list
[email protected]
http://lists.infradead.org/mailman/listinfo/kexec

Reply via email to