On Thu, Jun 23, 2016 at 01:31:32PM -0500, Josh Poimboeuf wrote:
> On Thu, Jun 23, 2016 at 09:35:29AM -0700, Andy Lutomirski wrote:
> > > So which is the least-bad option?  To summarize:
> > >
> > >   1) task flag(s) for preemption and page faults
> > >
> > >   2) turn pt_regs into a stack frame
> > >
> > >   3) annotate all calls from entry code in a table
> > >
> > >   4) encode rbp on entry
> > >
> > > They all have their issues, though I'm partial to #2.
> > >
> > > Any more hare-brained ideas? :-)
> > 
> > I'll try to take a closer look at #2 and see just how much I dislike
> > all the stack frame munging.
> 
> Ok.
> 
> > Also, in principle, it's only the
> > sleeping calls and the calls that make it into real (non-entry) kernel
> > code that really want to be unwindable through this mechanism.
> 
> Yeah, that's true.  We could modify options 2 or 3 to be less absolute.
> Though I think that makes them more prone to future breakage.
> 
> > FWIW, I don't care that much about preserving gdb's partial ability to
> > unwind through pt_regs, especially because gdb really ought to be able
> > to use DWARF, too.
> 
> Hm, that's a good point.  I really don't know if there are any other
> external tools out there that would care.  Maybe we could try option 4
> and then see if anybody complains.

I'm starting to think hare-brained option 4 is the way to go.  Any
external tooling should really be relying on DWARF anyway.

Here's a sneak preview.  If this general approach looks ok to you, I'll
go ahead and port all the in-tree unwinders and post a proper patch.

Instead of using xor -1 on the pt_regs pointer, I just cleared the
high-order bit.  That makes the unwinding experience much more pleasant
for a human stack walker, and also ensures that anybody trying to
dereference it gets slapped with an oops, at least in the 48-bit address
space era.

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 9a9e588..bf397426 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -201,6 +201,23 @@ For 32-bit we have the following conventions - kernel is 
built with
        .byte 0xf1
        .endm
 
+       /*
+        * This is a sneaky trick to help the unwinder find pt_regs on the
+        * stack.  The frame pointer is replaced with an encoded pointer to
+        * pt_regs.  The encoding is just a clearing of the highest-order bit,
+        * which makes it an invalid address and is also a signal to the
+        * unwinder that it's a pt_regs pointer in disguise.
+        *
+        * NOTE: This must be called *after* SAVE_EXTRA_REGS because it
+        * corrupts rbp.
+        */
+       .macro ENCODE_FRAME_POINTER ptregs_offset=0
+#ifdef CONFIG_FRAME_POINTER
+       leaq \ptregs_offset(%rsp), %rbp
+       btr $63, %rbp
+#endif
+       .endm
+
 #endif /* CONFIG_X86_64 */
 
 /*
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 9ee0da1..eb79652 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -431,6 +431,7 @@ END(irq_entries_start)
        ALLOC_PT_GPREGS_ON_STACK
        SAVE_C_REGS
        SAVE_EXTRA_REGS
+       ENCODE_FRAME_POINTER
 
        testb   $3, CS(%rsp)
        jz      1f
@@ -893,6 +894,7 @@ ENTRY(xen_failsafe_callback)
        ALLOC_PT_GPREGS_ON_STACK
        SAVE_C_REGS
        SAVE_EXTRA_REGS
+       ENCODE_FRAME_POINTER
        jmp     error_exit
 END(xen_failsafe_callback)
 
@@ -936,6 +938,7 @@ ENTRY(paranoid_entry)
        cld
        SAVE_C_REGS 8
        SAVE_EXTRA_REGS 8
+       ENCODE_FRAME_POINTER 8
        movl    $1, %ebx
        movl    $MSR_GS_BASE, %ecx
        rdmsr
@@ -983,6 +986,7 @@ ENTRY(error_entry)
        cld
        SAVE_C_REGS 8
        SAVE_EXTRA_REGS 8
+       ENCODE_FRAME_POINTER 8
        xorl    %ebx, %ebx
        testb   $3, CS+8(%rsp)
        jz      .Lerror_kernelspace
@@ -1165,6 +1169,7 @@ ENTRY(nmi)
        pushq   %r13            /* pt_regs->r13 */
        pushq   %r14            /* pt_regs->r14 */
        pushq   %r15            /* pt_regs->r15 */
+       ENCODE_FRAME_POINTER
 
        /*
         * At this point we no longer need to worry about stack damage
@@ -1182,7 +1187,7 @@ ENTRY(nmi)
         * do_nmi doesn't modify pt_regs.
         */
        SWAPGS
-       jmp     restore_c_regs_and_iret
+       jmp     restore_regs_and_iret
 
 .Lnmi_from_kernel:
        /*
_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to