GCC is unable to properly optimize functions that have a very short
likely case and a longer and register-heavier cold part -- it fails
to sink all of the register saving and stack frame setup code into
the unlikely part.

Help it out with syscall_return_slowpath by splitting it into two
parts and inline the hot part.

Saves 6 cycles for compat syscalls.

Signed-off-by: Andy Lutomirski <[email protected]>
---
 arch/x86/entry/common.c | 50 ++++++++++++++++++++++++++++---------------------
 1 file changed, 29 insertions(+), 21 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index d6b8f4fd8274..3d64a739e607 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -284,15 +284,40 @@ __visible inline void prepare_exit_to_usermode(struct 
pt_regs *regs)
        user_enter();
 }
 
+#define SYSCALL_EXIT_WORK_FLAGS                                \
+       (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT |      \
+        _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)
+
+static void syscall_slow_exit_work(struct pt_regs *regs, u32 cached_flags)
+{
+       bool step;
+
+       audit_syscall_exit(regs);
+
+       if (cached_flags & _TIF_SYSCALL_TRACEPOINT)
+               trace_sys_exit(regs, regs->ax);
+
+       /*
+        * If TIF_SYSCALL_EMU is set, we only get here because of
+        * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
+        * We already reported this syscall instruction in
+        * syscall_trace_enter().
+        */
+       step = unlikely(
+               (cached_flags & (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU))
+               == _TIF_SINGLESTEP);
+       if (step || cached_flags & _TIF_SYSCALL_TRACE)
+               tracehook_report_syscall_exit(regs, step);
+}
+
 /*
  * Called with IRQs on and fully valid regs.  Returns with IRQs off in a
  * state such that we can immediately switch to user mode.
  */
-__visible void syscall_return_slowpath(struct pt_regs *regs)
+__visible inline void syscall_return_slowpath(struct pt_regs *regs)
 {
        struct thread_info *ti = pt_regs_to_thread_info(regs);
        u32 cached_flags = READ_ONCE(ti->flags);
-       bool step;
 
        CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
 
@@ -304,25 +329,8 @@ __visible void syscall_return_slowpath(struct pt_regs 
*regs)
         * First do one-time work.  If these work items are enabled, we
         * want to run them exactly once per syscall exit with IRQs on.
         */
-       if (cached_flags & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT |
-                           _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)) {
-               audit_syscall_exit(regs);
-
-               if (cached_flags & _TIF_SYSCALL_TRACEPOINT)
-                       trace_sys_exit(regs, regs->ax);
-
-               /*
-                * If TIF_SYSCALL_EMU is set, we only get here because of
-                * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
-                * We already reported this syscall instruction in
-                * syscall_trace_enter().
-                */
-               step = unlikely(
-                       (cached_flags & (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU))
-                       == _TIF_SINGLESTEP);
-               if (step || cached_flags & _TIF_SYSCALL_TRACE)
-                       tracehook_report_syscall_exit(regs, step);
-       }
+       if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS))
+               syscall_slow_exit_work(regs, cached_flags);
 
 #ifdef CONFIG_COMPAT
        /*
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to