On Wed, Feb 14, 2018 at 2:06 PM, Dominik Brodowski
<li...@dominikbrodowski.net> wrote:
> On Wed, Feb 14, 2018 at 01:57:15PM -0500, Brian Gerst wrote:
>> On Wed, Feb 14, 2018 at 1:21 PM, Dominik Brodowski
>> <li...@dominikbrodowski.net> wrote:
>> > We can also move the SWAPGS and the switch_to_thread_stack to the
>> > interrupt helper function. As we do not want call depths of two,
>> > convert switch_to_thread_stack to a macro. However, as entry_64_compat.S
>> > expects switch_to_thread_stack to be a function, provide a wrapper for
>> > that, which leads to some code duplication if CONFIG_IA32_EMULATION is
>> > enabled. Therefore, the size reduction differs slightly:
>> >
>> > With CONFIG_IA32_EMULATION enabled (-0.13k):
>> >    text    data     bss     dec     hex filename
>> >   16897       0       0   16897    4201 entry_64.o-orig
>> >   16767       0       0   16767    417f entry_64.o
>> >
>> > With CONFIG_IA32_EMULATION disabled (-0.27k):
>> >    text    data     bss     dec     hex filename
>> >   16897       0       0   16897    4201 entry_64.o-orig
>> >   16622       0       0   16622    40ee entry_64.o
>> >
>> > Signed-off-by: Dominik Brodowski <li...@dominikbrodowski.net>
>> > ---
>> >  arch/x86/entry/entry_64.S | 65 
>> > ++++++++++++++++++++++++++---------------------
>> >  1 file changed, 36 insertions(+), 29 deletions(-)
>> >
>> > diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
>> > index 3046b12a1acb..b60a3b692ca9 100644
>> > --- a/arch/x86/entry/entry_64.S
>> > +++ b/arch/x86/entry/entry_64.S
>> > @@ -536,6 +536,31 @@ END(irq_entries_start)
>> >         decl    PER_CPU_VAR(irq_count)
>> >  .endm
>> >
>> > +/*
>> > + * Switch to the thread stack.  This is called with the IRET frame and
>> > + * orig_ax on the stack.  (That is, RDI..R12 are not on the stack and
>> > + * space has not been allocated for them.)
>> > + */
>> > +.macro DO_SWITCH_TO_THREAD_STACK
>> > +       pushq   %rdi
>> > +       /* Need to switch before accessing the thread stack. */
>> > +       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
>> > +       movq    %rsp, %rdi
>> > +       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
>> > +       UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
>> > +
>> > +       pushq   7*8(%rdi)               /* regs->ss */
>> > +       pushq   6*8(%rdi)               /* regs->rsp */
>> > +       pushq   5*8(%rdi)               /* regs->eflags */
>> > +       pushq   4*8(%rdi)               /* regs->cs */
>> > +       pushq   3*8(%rdi)               /* regs->ip */
>> > +       pushq   2*8(%rdi)               /* regs->orig_ax */
>> > +       pushq   8(%rdi)                 /* return address */
>> > +       UNWIND_HINT_FUNC
>> > +
>> > +       movq    (%rdi), %rdi
>> > +.endm
>> > +
>> >  /*
>> >   * Interrupt entry/exit.
>> >   *
>> > @@ -543,10 +568,17 @@ END(irq_entries_start)
>> >   *
>> >   * Entry runs with interrupts off.
>> >   */
>> > +/* 8(%rsp): ~(interrupt number) */
>> >  ENTRY(interrupt_helper)
>> >         UNWIND_HINT_FUNC
>> >         cld
>> >
>> > +       testb   $3, CS-ORIG_RAX+8(%rsp)
>> > +       jz      1f
>> > +       SWAPGS
>> > +       DO_SWITCH_TO_THREAD_STACK
>> > +1:
>> > +
>> >         PUSH_AND_CLEAR_REGS save_ret=1
>> >         ENCODE_FRAME_POINTER 8
>> >
>> > @@ -579,12 +611,6 @@ END(interrupt_helper)
>> >         .macro interrupt func
>> >         cld
>> >
>> > -       testb   $3, CS-ORIG_RAX(%rsp)
>> > -       jz      1f
>> > -       SWAPGS
>> > -       call    switch_to_thread_stack
>> > -1:
>> > -
>> >         call    interrupt_helper
>> >
>> >         call    \func   /* rdi points to pt_regs */
>> > @@ -853,33 +879,14 @@ apicinterrupt IRQ_WORK_VECTOR                     
>> > irq_work_interrupt              smp_irq_work_interrupt
>> >   */
>> >  #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
>> >
>> > -/*
>> > - * Switch to the thread stack.  This is called with the IRET frame and
>> > - * orig_ax on the stack.  (That is, RDI..R12 are not on the stack and
>> > - * space has not been allocated for them.)
>> > - */
>> > +#if defined(CONFIG_IA32_EMULATION)
>> > +/* entry_64_compat.S::entry_INT80_compat expects this to be an ASM 
>> > function */
>> >  ENTRY(switch_to_thread_stack)
>> >         UNWIND_HINT_FUNC
>> > -
>> > -       pushq   %rdi
>> > -       /* Need to switch before accessing the thread stack. */
>> > -       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
>> > -       movq    %rsp, %rdi
>> > -       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
>> > -       UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
>> > -
>> > -       pushq   7*8(%rdi)               /* regs->ss */
>> > -       pushq   6*8(%rdi)               /* regs->rsp */
>> > -       pushq   5*8(%rdi)               /* regs->eflags */
>> > -       pushq   4*8(%rdi)               /* regs->cs */
>> > -       pushq   3*8(%rdi)               /* regs->ip */
>> > -       pushq   2*8(%rdi)               /* regs->orig_ax */
>> > -       pushq   8(%rdi)                 /* return address */
>> > -       UNWIND_HINT_FUNC
>> > -
>> > -       movq    (%rdi), %rdi
>> > +       DO_SWITCH_TO_THREAD_STACK
>> >         ret
>> >  END(switch_to_thread_stack)
>> > +#endif
>> >
>> >  .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
>> >  ENTRY(\sym)
>> > --
>> > 2.16.1
>> >
>>
>> Move the macro to calling.h, and inline it into the compat entry.
>
> That certainly sounds possible, but makes the macro more complex: Inlining
> means that the offsets need to be reduced by -8. But we need the current
> offset for the call from interrupt_helper. So such a change might make the
> code less readable.
>
> Thanks,
>         Dominik

It would probably be better to just open code it then, without the
return address handling.

--
Brian Gerst

Reply via email to