The macro is largish, and we have 24 instances of it.

By moving almost its entire body into a function,
1500..3000 bytes are saved (depending on .config):

   text    data     bss     dec     hex filename
  12546       0       0   12546    3102 entry_64.o.before
   9394       0       0    9394    24b2 entry_64.o

Run-tested.

Signed-off-by: Denys Vlasenko <[email protected]>
CC: Linus Torvalds <[email protected]>
CC: Steven Rostedt <[email protected]>
CC: Ingo Molnar <[email protected]>
CC: Borislav Petkov <[email protected]>
CC: "H. Peter Anvin" <[email protected]>
CC: Andy Lutomirski <[email protected]>
CC: Oleg Nesterov <[email protected]>
CC: Frederic Weisbecker <[email protected]>
CC: Alexei Starovoitov <[email protected]>
CC: Will Drewry <[email protected]>
CC: Kees Cook <[email protected]>
CC: [email protected]
CC: [email protected]
---
 arch/x86/kernel/entry_64.S | 130 +++++++++++++++++++++++++--------------------
 1 file changed, 71 insertions(+), 59 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b00ca22..1d33816 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -606,6 +606,71 @@ ENTRY(ret_from_fork)
        CFI_ENDPROC
 END(ret_from_fork)
 
+
+/*
+ * Common code for all interrupt entry points:
+ * save C-clobbered registers, maybe swapgs, maybe switch stacks.
+ * On return, %rdi points to pt_regs, old %rsp is on stack.
+ */
+ENTRY(interrupt_entry)
+       /*
+        * Since nothing in interrupt handling code touches r12...r15 members
+        * of "struct pt_regs", and since interrupts can nest, we can save
+        * four stack slots and simultaneously provide
+        * an unwind-friendly stack layout by saving "truncated" pt_regs
+        * exactly up to rbp slot, without these members.
+        * rbx slot is not populated. rbp slot is, but not used for restore.
+        */
+       XCPT_FRAME 1 15*8-RBP
+       ASM_CLAC
+       cld
+       /* ALLOC_PT_GPREGS_ON_STACK -RBP -- must be done by caller */
+       SAVE_C_REGS -RBP+8
+       /* Store %rpb to 8(%rsp) for unwinder, not for saving it per se */
+       SAVE_EXTRA_REGS_RBP -RBP+8
+
+       /* Caller expects us to return pointer to pt_regs in %rdi */
+       leaq    -RBP+8(%rsp), %rdi
+
+       testb   $3, CS-RBP+8(%rsp)
+       jz      1f
+       SWAPGS
+1:
+       /*
+        * Optionally switch to interrupt stack. Push previous stack pointer.
+        * irq_count is used to check if a CPU is already on an interrupt stack
+        * or not. While this is essentially redundant with preempt_count,
+        * it is a little cheaper to use a separate per-CPU counter (short of
+        * moving irq_enter into assembly, which would be too much work)
+        */
+       popq    %rax    /* get return address */
+       movq    %rsp, %rsi
+       incl    PER_CPU_VAR(irq_count)
+       cmovzq  PER_CPU_VAR(irq_stack_ptr), %rsp
+       CFI_DEF_CFA_REGISTER rsi
+
+       pushq   %rsi
+       pushq   %rax
+       ret
+       CFI_ENDPROC
+END(interrupt_entry)
+.macro save_regs_and_call_intr_handler func
+       ALLOC_PT_GPREGS_ON_STACK -RBP
+       call    interrupt_entry
+       /*
+        * For debugger:
+        * "CFA (Current Frame Address) is the value on stack + offset"
+        */
+       CFI_ESCAPE      0x0f /* DW_CFA_def_cfa_expression */, 6, \
+                       0x77 /* DW_OP_breg7 (rsp) */, 0, \
+                       0x06 /* DW_OP_deref */, \
+                       0x08 /* DW_OP_const1u */, SIZEOF_PTREGS-RBP, \
+                       0x22 /* DW_OP_plus */
+       /* We entered an interrupt context - irqs are off: */
+       TRACE_IRQS_OFF
+       call    \func   /* expects %rdi -> pt_regs */
+.endm
+
 /*
  * Build the entry stubs and pointer table with some assembler magic.
  * We pack 7 stubs into a single 32-byte chunk, which will fit in a
@@ -685,69 +750,17 @@ END(interrupt)
 /*
  * Interrupt entry/exit.
  *
- * Interrupt entry points save only callee clobbered registers in fast path.
- *
  * Entry runs with interrupts off.
+ *
+ * The interrupt stubs pushed (~vector+0x80) onto the stack and
+ * then jumped to common_interrupt.
+ * Interrupt entry points save only callee clobbered registers in fast path.
  */
-
-/* 0(%rsp): ~(interrupt number) */
-       .macro interrupt func
-       cld
-       /*
-        * Since nothing in interrupt handling code touches r12...r15 members
-        * of "struct pt_regs", and since interrupts can nest, we can save
-        * four stack slots and simultaneously provide
-        * an unwind-friendly stack layout by saving "truncated" pt_regs
-        * exactly up to rbp slot, without these members.
-        */
-       ALLOC_PT_GPREGS_ON_STACK -RBP
-       SAVE_C_REGS -RBP
-       /* this goes to 0(%rsp) for unwinder, not for saving the value: */
-       SAVE_EXTRA_REGS_RBP -RBP
-
-       leaq -RBP(%rsp),%rdi    /* arg1 for \func (pointer to pt_regs) */
-
-       testl $3, CS-RBP(%rsp)
-       je 1f
-       SWAPGS
-1:
-       /*
-        * Save previous stack pointer, optionally switch to interrupt stack.
-        * irq_count is used to check if a CPU is already on an interrupt stack
-        * or not. While this is essentially redundant with preempt_count it is
-        * a little cheaper to use a separate counter in the PDA (short of
-        * moving irq_enter into assembly, which would be too much work)
-        */
-       movq %rsp, %rsi
-       incl PER_CPU_VAR(irq_count)
-       cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
-       CFI_DEF_CFA_REGISTER    rsi
-       pushq %rsi
-       /*
-        * For debugger:
-        * "CFA (Current Frame Address) is the value on stack + offset"
-        */
-       CFI_ESCAPE      0x0f /* DW_CFA_def_cfa_expression */, 6, \
-                       0x77 /* DW_OP_breg7 (rsp) */, 0, \
-                       0x06 /* DW_OP_deref */, \
-                       0x08 /* DW_OP_const1u */, SIZEOF_PTREGS-RBP, \
-                       0x22 /* DW_OP_plus */
-       /* We entered an interrupt context - irqs are off: */
-       TRACE_IRQS_OFF
-
-       call \func
-       .endm
-
-       /*
-        * The interrupt stubs push (~vector+0x80) onto the stack and
-        * then jump to common_interrupt.
-        */
        .align ALIGN_common_interrupt
 common_interrupt:
        XCPT_FRAME
-       ASM_CLAC
        addq $-0x80,(%rsp)              /* Adjust vector to [-256,-1] range */
-       interrupt do_IRQ
+       save_regs_and_call_intr_handler do_IRQ
        /* 0(%rsp): old RSP */
 ret_from_intr:
        DISABLE_INTERRUPTS(CLBR_NONE)
@@ -902,10 +915,9 @@ END(common_interrupt)
 .macro apicinterrupt3 num sym do_sym
 ENTRY(\sym)
        INTR_FRAME
-       ASM_CLAC
        pushq_cfi $~(\num)
 .Lcommon_\sym:
-       interrupt \do_sym
+       save_regs_and_call_intr_handler \do_sym
        jmp ret_from_intr
        CFI_ENDPROC
 END(\sym)
-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to