Instead of using a duplicate syscall table for the fast path, create
stubs for the syscalls that need pt_regs that dispatch based on the
call site.

I think that this is very likely to introduce a mis-predicted branch
in all such syscalls.  I think that's fine -- all of them are
already very slow.

Heavily based on a patch from Brian Gerst [1].

[1] http://lkml.kernel.org/g/1449666173-15366-1-git-send-email-brge...@gmail.com

Signed-off-by: Brian Gerst <brge...@gmail.com>
Cc: the arch/x86 maintainers <x...@kernel.org>
Cc: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Cc: Borislav Petkov <b...@alien8.de>
Cc: Frédéric Weisbecker <fweis...@gmail.com>
Cc: Denys Vlasenko <dvlas...@redhat.com>
Cc: Linus Torvalds <torva...@linux-foundation.org>
Signed-off-by: Andy Lutomirski <l...@kernel.org>
---

Brian, here's a counter-proposal.  It's derived from your patch, but it works
differently.

If people like this, I'll send a new version of the whole series that includes
it at the end.

arch/x86/entry/entry_64.S   | 49 ++++++++++++++++++++++++++++++++++++++-------
 arch/x86/entry/syscall_64.c | 25 +++++------------------
 2 files changed, 47 insertions(+), 27 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1ab5362f241d..16779b52419e 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -188,7 +188,15 @@ entry_SYSCALL_64_fastpath:
 #endif
        ja      1f                              /* return -ENOSYS (already in 
pt_regs->ax) */
        movq    %r10, %rcx
-       call    *sys_call_table_fastpath_64(, %rax, 8)
+
+       /*
+        * This call instruction is handled specially in stub_ptregs_64.
+        * It might end up jumping to the slow path.  If it jumps, rax and
+        * r11 are clobbered.
+        */
+       call    *sys_call_table(, %rax, 8)
+.Lentry_SYSCALL_64_after_fastpath_call:
+
        movq    %rax, RAX(%rsp)
 1:
 
@@ -306,15 +314,42 @@ END(entry_SYSCALL_64)
 
 ENTRY(stub_ptregs_64)
        /*
-        * Syscalls marked as needing ptregs that go through the fast path
-        * land here.  We transfer to the slow path.
+        * Syscalls marked as needing ptregs land here.
+        * If we are on the fast path, we need to save the extra regs.
+        * If we are on the slow path, the extra regs are already saved.
+        *
+        * RAX stores a pointer to the C function implementing the syscall.
+        *
+        * We can safely clobber RAX (clobbered by return value regardless)
+        * and R11 (owned by callee and never stores an argument) regardless
+        * of which path we take.
         */
-       DISABLE_INTERRUPTS(CLBR_NONE)
-       TRACE_IRQS_OFF
-       addq    $8, %rsp
-       jmp     entry_SYSCALL64_slow_path
+       leaq    .Lentry_SYSCALL_64_after_fastpath_call(%rip), %r11
+       cmpq    %r11, (%rsp)
+       jne     1f
+
+       /* Called from fast path -- pop return address and jump to slow path */
+       popq    %rax
+       jmp     entry_SYSCALL64_slow_path       /* called from fast path */
+
+1:
+       /* Called from C */
+       jmp     *%rax                           /* called from C */
 END(stub_ptregs_64)
 
+.macro ptregs_stub func
+ENTRY(ptregs_\func)
+       leaq    \func(%rip), %rax
+       jmp     stub_ptregs_64
+END(ptregs_\func)
+.endm
+
+/* Instantiate ptregs_stub for each ptregs-using syscall */
+#define __SYSCALL_64_QUAL_(sym)
+#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
+#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
+#include <asm/syscalls_64.h>
+
 /*
  * A newly forked process directly context switches into this address.
  *
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index 601745c667ce..9dbc5abb6162 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -6,11 +6,14 @@
 #include <asm/asm-offsets.h>
 #include <asm/syscall.h>
 
-#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, 
unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_64_QUAL_(sym) sym
+#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym
+
+#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long 
__SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, 
unsigned long, unsigned long, unsigned long);
 #include <asm/syscalls_64.h>
 #undef __SYSCALL_64
 
-#define __SYSCALL_64(nr, sym, qual) [nr] = sym,
+#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym),
 
 extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, 
unsigned long, unsigned long, unsigned long);
 
@@ -22,21 +25,3 @@ asmlinkage const sys_call_ptr_t 
sys_call_table[__NR_syscall_max+1] = {
        [0 ... __NR_syscall_max] = &sys_ni_syscall,
 #include <asm/syscalls_64.h>
 };
-
-#undef __SYSCALL_64
-
-extern long stub_ptregs_64(unsigned long, unsigned long, unsigned long, 
unsigned long, unsigned long, unsigned long);
-
-#define __SYSCALL_64_QUAL_(nr, sym) [nr] = sym,
-#define __SYSCALL_64_QUAL_ptregs(nr, sym) [nr] = stub_ptregs_64,
-
-#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(nr, sym)
-
-asmlinkage const sys_call_ptr_t sys_call_table_fastpath_64[__NR_syscall_max+1] 
= {
-       /*
-        * Smells like a compiler bug -- it doesn't work
-        * when the & below is removed.
-        */
-       [0 ... __NR_syscall_max] = &sys_ni_syscall,
-#include <asm/syscalls_64.h>
-};
-- 
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to