Signed-off-by: Andy Lutomirski <l...@kernel.org>
---
 arch/x86/entry/common.c                  |  15 +++-
 arch/x86/entry/entry_32.S                | 132 ++++++++-----------------------
 arch/x86/entry/vdso/vdso32/system_call.S |   2 +
 3 files changed, 50 insertions(+), 99 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 9182c69f860b..96bf0e79159e 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -413,7 +413,20 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
                regs->ip == landing_pad &&
                (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)) == 0;
 #else
-       return 0;
+       /*
+        * Opportunistic SYSEXIT: if possible, try to return using SYSEXIT.
+        *
+        * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
+        * because the ECX fixup above will ensure that this is essentially
+        * never the case.
+        *
+        * We don't allow syscalls at all from vm86 mode, but we still
+        * need to check VM, becuase we might be returning from sys_vm86.
+        */
+       return static_cpu_has(X86_FEATURE_SEP) &&
+               regs->cs == __USER_CS && regs->ss == __USER_DS &&
+               regs->ip == landing_pad &&
+               (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) 
== 0;
 #endif
 }
 #endif
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 02881e528945..c1c7c6364216 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -287,76 +287,47 @@ need_resched:
 END(resume_kernel)
 #endif
 
-/*
- * SYSENTER_RETURN points to after the SYSENTER instruction
- * in the vsyscall page.  See vsyscall-sysentry.S, which defines
- * the symbol.
- */
-
        # SYSENTER  call handler stub
 ENTRY(entry_SYSENTER_32)
        movl    TSS_sysenter_sp0(%esp), %esp
 sysenter_past_esp:
+       pushl   $__USER_DS              /* pt_regs->ss */
+       pushl   %ecx                    /* pt_regs->cx */
+       pushfl                          /* pt_regs->flags (except IF = 0) */
+       orl     $X86_EFLAGS_IF, (%esp)  /* Fix IF */
+       pushl   $__USER_CS              /* pt_regs->cs */
+       pushl   $0                      /* pt_regs->ip = 0 (placeholder) */
+       pushl   %eax                    /* pt_regs->orig_ax */
+       SAVE_ALL pt_regs_ax=$-ENOSYS    /* save rest */
+
        /*
-        * Interrupts are disabled here, but we can't trace it until
-        * enough kernel state to call TRACE_IRQS_OFF can be called - but
-        * we immediately enable interrupts at that point anyway.
-        */
-       pushl   $__USER_DS
-       pushl   %ebp
-       pushfl
-       orl     $X86_EFLAGS_IF, (%esp)
-       pushl   $__USER_CS
-       /*
-        * Push current_thread_info()->sysenter_return to the stack.
-        * A tiny bit of offset fixup is necessary: TI_sysenter_return
-        * is relative to thread_info, which is at the bottom of the
-        * kernel stack page.  4*4 means the 4 words pushed above;
-        * TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack;
-        * and THREAD_SIZE takes us to the bottom.
+        * User mode is traced as though IRQs are on, and SYSENTER
+        * turned them off.
         */
-       pushl   ((TI_sysenter_return) - THREAD_SIZE + 
TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp)
-
-       pushl   %eax
-       SAVE_ALL
-       ENABLE_INTERRUPTS(CLBR_NONE)
-
-/*
- * Load the potential sixth argument from user stack.
- * Careful about security.
- */
-       cmpl    $__PAGE_OFFSET-3, %ebp
-       jae     syscall_fault
-       ASM_STAC
-1:     movl    (%ebp), %ebp
-       ASM_CLAC
-       movl    %ebp, PT_EBP(%esp)
-       _ASM_EXTABLE(1b, syscall_fault)
-
-       GET_THREAD_INFO(%ebp)
-
-       testl   $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp)
-       jnz     syscall_trace_entry
-sysenter_do_call:
-       cmpl    $(NR_syscalls), %eax
-       jae     sysenter_badsys
-       call    *sys_call_table(, %eax, 4)
-sysenter_after_call:
-       movl    %eax, PT_EAX(%esp)
-       LOCKDEP_SYS_EXIT
-       DISABLE_INTERRUPTS(CLBR_ANY)
        TRACE_IRQS_OFF
-       movl    TI_flags(%ebp), %ecx
-       testl   $_TIF_ALLWORK_MASK, %ecx
-       jnz     syscall_exit_work_irqs_off
-sysenter_exit:
-/* if something modifies registers it must also disable sysexit */
-       movl    PT_EIP(%esp), %edx
-       movl    PT_OLDESP(%esp), %ecx
-       xorl    %ebp, %ebp
-       TRACE_IRQS_ON
+
+       movl    %esp, %eax
+       call    do_fast_syscall_32
+       testl   %eax, %eax
+       jz      .Lsyscall_32_done
+
+/* Opportunistic SYSEXIT */
+       TRACE_IRQS_ON                   /* User mode traces as IRQs on. */
+       movl    PT_EIP(%esp), %edx      /* pt_regs->ip */
+       movl    PT_OLDESP(%esp), %ecx   /* pt_regs->sp */
+       popl    %ebx                    /* pt_regs->bx */
+       addl    $2*4, %esp              /* skip pt_regs->cx and pt_regs->dx */
+       popl    %esi                    /* pt_regs->si */
+       popl    %edi                    /* pt_regs->di */
+       popl    %ebp                    /* pt_regs->bp */
+       popl    %eax                    /* pt_regs->ax */
 1:     mov     PT_FS(%esp), %fs
        PTGS_TO_GS
+
+       /*
+        * Return back to the vDSO, which will pop ecx and edx.
+        * Don't bother with DS and ES (they already contain __USER_DS).
+        */
        ENABLE_INTERRUPTS_SYSEXIT
 
 .pushsection .fixup, "ax"
@@ -371,7 +342,7 @@ ENDPROC(entry_SYSENTER_32)
 ENTRY(entry_INT80_32)
        ASM_CLAC
        pushl   %eax                    /* pt_regs->orig_ax */
-       SAVE_ALL pt_regs_ax=$-ENOSYS    /* save rest, load -ENOSYS into ax */
+       SAVE_ALL pt_regs_ax=$-ENOSYS    /* save rest */
 
        /*
         * User mode is traced as though IRQs are on, and the interrupt gate
@@ -381,6 +352,7 @@ ENTRY(entry_INT80_32)
 
        movl    %esp, %eax
        call    do_int80_syscall_32
+.Lsyscall_32_done:
 
 restore_all:
        TRACE_IRQS_IRET
@@ -457,42 +429,6 @@ ldt_ss:
 #endif
 ENDPROC(entry_INT80_32)
 
-       # perform syscall exit tracing
-       ALIGN
-syscall_trace_entry:
-       movl    $-ENOSYS, PT_EAX(%esp)
-       movl    %esp, %eax
-       call    syscall_trace_enter
-       /* What it returned is what we'll actually use.  */
-       cmpl    $(NR_syscalls), %eax
-       jnae    syscall_call
-       jmp     syscall_exit
-END(syscall_trace_entry)
-
-       # perform syscall exit tracing
-       ALIGN
-syscall_exit_work_irqs_off:
-       TRACE_IRQS_ON
-       ENABLE_INTERRUPTS(CLBR_ANY)
-
-syscall_exit_work:
-       movl    %esp, %eax
-       call    syscall_return_slowpath
-       jmp     restore_all
-END(syscall_exit_work)
-
-syscall_fault:
-       ASM_CLAC
-       GET_THREAD_INFO(%ebp)
-       movl    $-EFAULT, PT_EAX(%esp)
-       jmp     resume_userspace
-END(syscall_fault)
-
-sysenter_badsys:
-       movl    $-ENOSYS, %eax
-       jmp     sysenter_after_call
-END(sysenter_badsys)
-
 .macro FIXUP_ESPFIX_STACK
 /*
  * Switch back for ESPFIX stack to the normal zerobased stack
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S 
b/arch/x86/entry/vdso/vdso32/system_call.S
index 00157cae71e0..93bd8452383f 100644
--- a/arch/x86/entry/vdso/vdso32/system_call.S
+++ b/arch/x86/entry/vdso/vdso32/system_call.S
@@ -34,6 +34,8 @@ __kernel_vsyscall:
        /* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */
        ALTERNATIVE_2 "", "sysenter", X86_FEATURE_SYSENTER32, \
                          "syscall",  X86_FEATURE_SYSCALL32
+#else
+       ALTERNATIVE "", "sysenter", X86_FEATURE_SEP
 #endif
 
        /* Enter using int $0x80 */
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to