The macro is largish, and we have 24 instances of it. By moving almost its entire body into a function, 1500..3000 bytes are saved (depending on .config):
text data bss dec hex filename 12546 0 0 12546 3102 entry_64.o.before 9394 0 0 9394 24b2 entry_64.o Run-tested. Signed-off-by: Denys Vlasenko <[email protected]> CC: Linus Torvalds <[email protected]> CC: Steven Rostedt <[email protected]> CC: Ingo Molnar <[email protected]> CC: Borislav Petkov <[email protected]> CC: "H. Peter Anvin" <[email protected]> CC: Andy Lutomirski <[email protected]> CC: Oleg Nesterov <[email protected]> CC: Frederic Weisbecker <[email protected]> CC: Alexei Starovoitov <[email protected]> CC: Will Drewry <[email protected]> CC: Kees Cook <[email protected]> CC: [email protected] CC: [email protected] --- arch/x86/kernel/entry_64.S | 130 +++++++++++++++++++++++++-------------------- 1 file changed, 71 insertions(+), 59 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b00ca22..1d33816 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -606,6 +606,71 @@ ENTRY(ret_from_fork) CFI_ENDPROC END(ret_from_fork) + +/* + * Common code for all interrupt entry points: + * save C-clobbered registers, maybe swapgs, maybe switch stacks. + * On return, %rdi points to pt_regs, old %rsp is on stack. + */ +ENTRY(interrupt_entry) + /* + * Since nothing in interrupt handling code touches r12...r15 members + * of "struct pt_regs", and since interrupts can nest, we can save + * four stack slots and simultaneously provide + * an unwind-friendly stack layout by saving "truncated" pt_regs + * exactly up to rbp slot, without these members. + * rbx slot is not populated. rbp slot is, but not used for restore. + */ + XCPT_FRAME 1 15*8-RBP + ASM_CLAC + cld + /* ALLOC_PT_GPREGS_ON_STACK -RBP -- must be done by caller */ + SAVE_C_REGS -RBP+8 + /* Store %rpb to 8(%rsp) for unwinder, not for saving it per se */ + SAVE_EXTRA_REGS_RBP -RBP+8 + + /* Caller expects us to return pointer to pt_regs in %rdi */ + leaq -RBP+8(%rsp), %rdi + + testb $3, CS-RBP+8(%rsp) + jz 1f + SWAPGS +1: + /* + * Optionally switch to interrupt stack. Push previous stack pointer. + * irq_count is used to check if a CPU is already on an interrupt stack + * or not. While this is essentially redundant with preempt_count, + * it is a little cheaper to use a separate per-CPU counter (short of + * moving irq_enter into assembly, which would be too much work) + */ + popq %rax /* get return address */ + movq %rsp, %rsi + incl PER_CPU_VAR(irq_count) + cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp + CFI_DEF_CFA_REGISTER rsi + + pushq %rsi + pushq %rax + ret + CFI_ENDPROC +END(interrupt_entry) +.macro save_regs_and_call_intr_handler func + ALLOC_PT_GPREGS_ON_STACK -RBP + call interrupt_entry + /* + * For debugger: + * "CFA (Current Frame Address) is the value on stack + offset" + */ + CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ + 0x77 /* DW_OP_breg7 (rsp) */, 0, \ + 0x06 /* DW_OP_deref */, \ + 0x08 /* DW_OP_const1u */, SIZEOF_PTREGS-RBP, \ + 0x22 /* DW_OP_plus */ + /* We entered an interrupt context - irqs are off: */ + TRACE_IRQS_OFF + call \func /* expects %rdi -> pt_regs */ +.endm + /* * Build the entry stubs and pointer table with some assembler magic. * We pack 7 stubs into a single 32-byte chunk, which will fit in a @@ -685,69 +750,17 @@ END(interrupt) /* * Interrupt entry/exit. * - * Interrupt entry points save only callee clobbered registers in fast path. - * * Entry runs with interrupts off. + * + * The interrupt stubs pushed (~vector+0x80) onto the stack and + * then jumped to common_interrupt. + * Interrupt entry points save only callee clobbered registers in fast path. */ - -/* 0(%rsp): ~(interrupt number) */ - .macro interrupt func - cld - /* - * Since nothing in interrupt handling code touches r12...r15 members - * of "struct pt_regs", and since interrupts can nest, we can save - * four stack slots and simultaneously provide - * an unwind-friendly stack layout by saving "truncated" pt_regs - * exactly up to rbp slot, without these members. - */ - ALLOC_PT_GPREGS_ON_STACK -RBP - SAVE_C_REGS -RBP - /* this goes to 0(%rsp) for unwinder, not for saving the value: */ - SAVE_EXTRA_REGS_RBP -RBP - - leaq -RBP(%rsp),%rdi /* arg1 for \func (pointer to pt_regs) */ - - testl $3, CS-RBP(%rsp) - je 1f - SWAPGS -1: - /* - * Save previous stack pointer, optionally switch to interrupt stack. - * irq_count is used to check if a CPU is already on an interrupt stack - * or not. While this is essentially redundant with preempt_count it is - * a little cheaper to use a separate counter in the PDA (short of - * moving irq_enter into assembly, which would be too much work) - */ - movq %rsp, %rsi - incl PER_CPU_VAR(irq_count) - cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp - CFI_DEF_CFA_REGISTER rsi - pushq %rsi - /* - * For debugger: - * "CFA (Current Frame Address) is the value on stack + offset" - */ - CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ - 0x77 /* DW_OP_breg7 (rsp) */, 0, \ - 0x06 /* DW_OP_deref */, \ - 0x08 /* DW_OP_const1u */, SIZEOF_PTREGS-RBP, \ - 0x22 /* DW_OP_plus */ - /* We entered an interrupt context - irqs are off: */ - TRACE_IRQS_OFF - - call \func - .endm - - /* - * The interrupt stubs push (~vector+0x80) onto the stack and - * then jump to common_interrupt. - */ .align ALIGN_common_interrupt common_interrupt: XCPT_FRAME - ASM_CLAC addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ - interrupt do_IRQ + save_regs_and_call_intr_handler do_IRQ /* 0(%rsp): old RSP */ ret_from_intr: DISABLE_INTERRUPTS(CLBR_NONE) @@ -902,10 +915,9 @@ END(common_interrupt) .macro apicinterrupt3 num sym do_sym ENTRY(\sym) INTR_FRAME - ASM_CLAC pushq_cfi $~(\num) .Lcommon_\sym: - interrupt \do_sym + save_regs_and_call_intr_handler \do_sym jmp ret_from_intr CFI_ENDPROC END(\sym) -- 1.8.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [email protected] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/

