Nicely done, Petr -- I really appreciate this fix.  Queued for crash-7.0.7:

  
https://github.com/crash-utility/crash/commit/8e15958e1b7183bbfbdf004f0ad8f2b62f023f9f

Thanks,
  Dave

----- Original Message -----
> Kernel commit 3f3c8b8c4b2a34776c3470142a7c8baafcda6eb0 changed the NMI stack
> layout, adding 12 more values on the stack. The fix has two parts:
> 
>   1. Determine if this kernel has the nested NMI layout and set a
>      machine-specific flag (NESTED_NMI) if it does.
> 
>   2. When backtracing an NMI stack, use the saved values instead of those
>      found at the top of stack.
> 
> Additionally, kernel commit 28696f434fef0efa97534b59986ad33b9c4df7f8 changed
> the stack layout again, swapping the location of the "saved" and "copied"
> registers. This can be detected automatically, because the "copied" registers
> contain either a copy of the "saved" registers, or point to repeat_nmi. So,
> if restart_nmi is found as the return address, assume that this is the old
> layout and adjust the stack pointer again.
> 
> Without the patch, wrong register values are shown in the NMI backtrace.
> 
> Signed-off-by: Petr Tesarik <[email protected]>
> ---
>  defs.h   |  1 +
>  x86_64.c | 73
>  +++++++++++++++++++++++++++++++++++++++++++++++++---------------
>  2 files changed, 57 insertions(+), 17 deletions(-)
> 
> diff --git a/defs.h b/defs.h
> index 711b154..4054de4 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -5123,6 +5123,7 @@ struct machine_specific {
>  #define VM_XEN_RHEL4 (0x100)
>  #define FRAMEPOINTER (0x200)
>  #define GART_REGION  (0x400)
> +#define NESTED_NMI   (0x800)
>  
>  #define VM_FLAGS (VM_ORIG|VM_2_6_11|VM_XEN|VM_XEN_RHEL4)
>  
> diff --git a/x86_64.c b/x86_64.c
> index 5364c30..fd384ac 100644
> --- a/x86_64.c
> +++ b/x86_64.c
> @@ -468,6 +468,8 @@ x86_64_init(int when)
>               else
>                       x86_64_per_cpu_init();
>               x86_64_ist_init();
> +             if (symbol_exists("repeat_nmi"))
> +                     machdep->flags |= NESTED_NMI;
>               machdep->in_alternate_stack = x86_64_in_alternate_stack;
>                  if ((machdep->machspec->irqstack = (char *)
>                   malloc(machdep->machspec->stkinfo.isize)) == NULL)
> @@ -3009,6 +3011,8 @@ in_exception_stack:
>                  }
>  
>               stacktop = bt->stacktop - SIZE(pt_regs);
> +             if ((machdep->flags & NESTED_NMI) && estack_index == NMI_STACK)
> +                     stacktop -= 12*sizeof(ulong);
>  
>               bt->flags &= ~BT_FRAMESIZE_DISABLE;
>  
> @@ -3046,21 +3050,37 @@ in_exception_stack:
>               }
>  
>                  cs = x86_64_exception_frame(EFRAME_PRINT|EFRAME_CS, 0,
> -                     bt->stackbuf + (bt->stacktop - bt->stackbase) -
> -                     SIZE(pt_regs), bt, ofp);
> +                     bt->stackbuf + (stacktop - bt->stackbase),
> +                     bt, ofp);
>  
>               if (!BT_REFERENCE_CHECK(bt))
>                       fprintf(fp, "--- <%s exception stack> ---\n",
>                               x86_64_exception_stacks[estack_index]);
>  
> -                /*
> -              *  stack = (unsigned long *) estack_end[-2];
> +             /*
> +              * Find the CPU-saved, or handler-saved registers
>                */
>               up = (ulong *)(&bt->stackbuf[bt->stacktop - bt->stackbase]);
> -             up -= 2;
> -             rsp = bt->stkptr = *up;
> -             up -= 3;
> -             bt->instptr = *up;
> +             up -= 5;
> +             if ((machdep->flags & NESTED_NMI) &&
> +                 estack_index == NMI_STACK &&
> +                 bt->stkptr <= bt->stacktop - 17*sizeof(ulong)) {
> +                     up -= 12;
> +                     /* Copied and saved regs are swapped in pre-3.8 kernels 
> */
> +                     if (*up == symbol_value("repeat_nmi"))
> +                             up += 5;
> +             }
> +
> +             /* Registers (as saved by CPU):
> +              *
> +              *   up[4]      SS
> +              *   up[3]      RSP
> +              *   up[2]      RFLAGS
> +              *   up[1]      CS
> +              *   up[0]      RIP
> +              */
> +             rsp = bt->stkptr = up[3];
> +             bt->instptr = up[0];
>               if (cs & 3)
>                       done = TRUE;   /* user-mode exception */
>               else
> @@ -3513,27 +3533,46 @@ in_exception_stack:
>                  }
>  
>               stacktop = bt->stacktop - SIZE(pt_regs);
> -
> +             if ((machdep->flags & NESTED_NMI) &&
> +                 estack_index == NMI_STACK)
> +                     stacktop -= 12*sizeof(ulong);
> +
>               if (!done) {
>                       level = dwarf_backtrace(bt, level, stacktop);
>                       done = TRUE;
>               }
>  
>                  cs = x86_64_exception_frame(EFRAME_PRINT|EFRAME_CS, 0,
> -                     bt->stackbuf + (bt->stacktop - bt->stackbase) -
> -                     SIZE(pt_regs), bt, ofp);
> +                     bt->stackbuf + (stacktop - bt->stackbase),
> +                     bt, ofp);
>  
>               if (!BT_REFERENCE_CHECK(bt))
>                       fprintf(fp, "--- <exception stack> ---\n");
>  
> -                /*
> -              *  stack = (unsigned long *) estack_end[-2];
> +             /*
> +              * Find the CPU-saved, or handler-saved registers
>                */
>               up = (ulong *)(&bt->stackbuf[bt->stacktop - bt->stackbase]);
> -             up -= 2;
> -             rsp = bt->stkptr = *up;
> -             up -= 3;
> -             bt->instptr = *up;
> +             up -= 5;
> +             if ((machdep->flags & NESTED_NMI) &&
> +                 estack_index == NMI_STACK &&
> +                 bt->stkptr <= bt->stacktop - 17*sizeof(ulong)) {
> +                     up -= 12;
> +                     /* Copied and saved regs are swapped in pre-3.8 kernels 
> */
> +                     if (*up == symbol_value("repeat_nmi"))
> +                             up += 5;
> +             }
> +
> +             /* Registers (as saved by CPU):
> +              *
> +              *   up[4]      SS
> +              *   up[3]      RSP
> +              *   up[2]      RFLAGS
> +              *   up[1]      CS
> +              *   up[0]      RIP
> +              */
> +             rsp = bt->stkptr = up[3];
> +             bt->instptr = up[0];
>               if (cs & 3)
>                       done = TRUE;   /* user-mode exception */
>               else
> --
> 1.8.4.5
> 

--
Crash-utility mailing list
[email protected]
https://www.redhat.com/mailman/listinfo/crash-utility

Reply via email to