This removes the hybrid asm-and-C implementation of exit work.

This patch modifies a giant hack.  vm86 used to fiddle with
TIF_NOTIFY_RESUME and fix itself up in the exit asm.  The hack was
messy and completely incorrect: it broke vm86 if the syscall slow
path was being used.

Rework the hack.  We now forcibly exit vm86 mode on return to
userspace if we're delivering a signal (this is needed to deliver
the signal correctly) or if a new TIF_EXIT_VM86 flag is set.  The
TIF_NOTIFY_RESUME hack is changed to use TIF_EXIT_VM86 instead.

This makes prepare_exit_to_usermode a bit slower on CONFIG_VM86=y
kernels.  People shouldn't use such kernels if they care about
sanity, security, or performance.

Brian Gerst is planning to further rework vm86 mode to leave pt_regs
where it belongs.  That will allow us to revert the
pt_regs_to_thread_info slowdown the stack switching parts of this
code; instead we can just exit normally, as vm86 won't have a
special stack layout any more.

Before this change, the entry_from_vm86 test failed under strace.
Now it passes.

Signed-off-by: Andy Lutomirski <[email protected]>
---
 arch/x86/entry/common.c            | 56 ++++++++++++++++++++++++++-
 arch/x86/entry/entry_32.S          | 79 ++++++--------------------------------
 arch/x86/include/asm/thread_info.h |  2 +
 arch/x86/kernel/vm86_32.c          |  6 +--
 4 files changed, 69 insertions(+), 74 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index febc53086a69..aeaf7d64be0f 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -240,10 +240,51 @@ void syscall_trace_leave(struct pt_regs *regs)
 
 static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
 {
+#ifdef CONFIG_VM86
+       /*
+        * In VM86 mode, pt_regs isn't in a well-defined place on the
+        * stack.  Skip the optimization entirely.
+        */
+       return current_thread_info();
+#else
        unsigned long top_of_stack =
                (unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING;
        return (struct thread_info *)(top_of_stack - THREAD_SIZE);
+#endif
+}
+
+#ifdef CONFIG_VM86
+static void __noreturn exit_vm86_immediately(struct pt_regs *regs)
+{
+       /*
+        * VM86 sometimes needs to exit back to normal user mode
+        * (unsurprisingly) and its hack of resetting the stack and
+        * jumping into the exit asm isn't always usable (also
+        * unsurprisingly).  Instead, we land in this abomination.
+        *
+        * While I can't defend this code as being anything other
+        * than awful, at least it's more or less self-contained,
+        * and it's less awful and much less buggy than the even
+        * worse hack it replaces.  --Andy
+        */
+       struct pt_regs *regs32;
+
+       clear_tsk_thread_flag(current, TIF_EXIT_VM86);
+       regs32 = save_v86_state((struct kernel_vm86_regs *)regs);
+       local_irq_disable();
+       __asm__ __volatile__(
+               "movl %0,%%esp\n\t"
+               "movl %1,%%ebp\n\t"
+               "jmp resume_userspace"
+               : : "r" (regs32), "r" (current_thread_info()));
+
+       /*
+        * We don't get here.  Instead we restart
+        * prepare_exit_to_usermode via resume_userspace.
+        */
+       unreachable();
 }
+#endif
 
 /* Called with IRQs disabled. */
 __visible void prepare_exit_to_usermode(struct pt_regs *regs)
@@ -264,12 +305,18 @@ __visible void prepare_exit_to_usermode(struct pt_regs 
*regs)
                        READ_ONCE(pt_regs_to_thread_info(regs)->flags);
 
                if (!(cached_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME |
-                                     _TIF_UPROBE | _TIF_NEED_RESCHED)))
+                                     _TIF_UPROBE | _TIF_NEED_RESCHED |
+                                     _TIF_EXIT_VM86)))
                        break;
 
                /* We have work to do. */
                local_irq_enable();
 
+#ifdef CONFIG_VM86
+               if (cached_flags & _TIF_EXIT_VM86)
+                       exit_vm86_immediately(regs);
+#endif
+
                if (cached_flags & _TIF_NEED_RESCHED)
                        schedule();
 
@@ -277,8 +324,13 @@ __visible void prepare_exit_to_usermode(struct pt_regs 
*regs)
                        uprobe_notify_resume(regs);
 
                /* deal with pending signal delivery */
-               if (cached_flags & _TIF_SIGPENDING)
+               if (cached_flags & _TIF_SIGPENDING) {
+#ifdef CONFIG_VM86
+                       if (v8086_mode(regs))
+                               exit_vm86_immediately(regs);
+#endif
                        do_signal(regs);
+               }
 
                if (cached_flags & _TIF_NOTIFY_RESUME) {
                        clear_thread_flag(TIF_NOTIFY_RESUME);
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 66ff9c4055d7..b2909bf8cf70 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -256,14 +256,10 @@ ret_from_intr:
 
 ENTRY(resume_userspace)
        LOCKDEP_SYS_EXIT
-       DISABLE_INTERRUPTS(CLBR_ANY)            # make sure we don't miss an 
interrupt
-                                               # setting need_resched or 
sigpending
-                                               # between sampling and the iret
+       DISABLE_INTERRUPTS(CLBR_ANY)
        TRACE_IRQS_OFF
-       movl    TI_flags(%ebp), %ecx
-       andl    $_TIF_WORK_MASK, %ecx           # is there any work to be done 
on
-                                               # int/exception return?
-       jne     work_pending
+       movl    %esp, %eax
+       call    prepare_exit_to_usermode
        jmp     restore_all
 END(ret_from_exception)
 
@@ -341,7 +337,7 @@ sysenter_after_call:
        TRACE_IRQS_OFF
        movl    TI_flags(%ebp), %ecx
        testl   $_TIF_ALLWORK_MASK, %ecx
-       jnz     syscall_exit_work
+       jnz     syscall_exit_work_irqs_off
 sysenter_exit:
 /* if something modifies registers it must also disable sysexit */
        movl    PT_EIP(%esp), %edx
@@ -377,13 +373,7 @@ syscall_after_call:
        movl    %eax, PT_EAX(%esp)              # store the return value
 syscall_exit:
        LOCKDEP_SYS_EXIT
-       DISABLE_INTERRUPTS(CLBR_ANY)            # make sure we don't miss an 
interrupt
-                                               # setting need_resched or 
sigpending
-                                               # between sampling and the iret
-       TRACE_IRQS_OFF
-       movl    TI_flags(%ebp), %ecx
-       testl   $_TIF_ALLWORK_MASK, %ecx        # current->work
-       jnz     syscall_exit_work
+       jmp     syscall_exit_work
 
 restore_all:
        TRACE_IRQS_IRET
@@ -460,52 +450,6 @@ ldt_ss:
 #endif
 ENDPROC(entry_INT80_32)
 
-       # perform work that needs to be done immediately before resumption
-       ALIGN
-work_pending:
-       testb   $_TIF_NEED_RESCHED, %cl
-       jz      work_notifysig
-work_resched:
-       call    schedule
-       LOCKDEP_SYS_EXIT
-       DISABLE_INTERRUPTS(CLBR_ANY)            # make sure we don't miss an 
interrupt
-                                               # setting need_resched or 
sigpending
-                                               # between sampling and the iret
-       TRACE_IRQS_OFF
-       movl    TI_flags(%ebp), %ecx
-       andl    $_TIF_WORK_MASK, %ecx           # is there any work to be done 
other
-                                               # than syscall tracing?
-       jz      restore_all
-       testb   $_TIF_NEED_RESCHED, %cl
-       jnz     work_resched
-
-work_notifysig:                                        # deal with pending 
signals and
-                                               # notify-resume requests
-#ifdef CONFIG_VM86
-       testl   $X86_EFLAGS_VM, PT_EFLAGS(%esp)
-       movl    %esp, %eax
-       jnz     work_notifysig_v86              # special case for v86
-1:
-#else
-       movl    %esp, %eax
-#endif
-       TRACE_IRQS_ON
-       ENABLE_INTERRUPTS(CLBR_NONE)
-       xorl    %edx, %edx
-       call    do_notify_resume
-       jmp     resume_userspace
-
-#ifdef CONFIG_VM86
-       ALIGN
-work_notifysig_v86:
-       pushl   %ecx                            # save ti_flags for 
do_notify_resume
-       call    save_v86_state                  # %eax contains pt_regs pointer
-       popl    %ecx
-       movl    %eax, %esp
-       jmp     1b
-#endif
-END(work_pending)
-
        # perform syscall exit tracing
        ALIGN
 syscall_trace_entry:
@@ -520,15 +464,14 @@ END(syscall_trace_entry)
 
        # perform syscall exit tracing
        ALIGN
-syscall_exit_work:
-       testl   $_TIF_WORK_SYSCALL_EXIT, %ecx
-       jz      work_pending
+syscall_exit_work_irqs_off:
        TRACE_IRQS_ON
-       ENABLE_INTERRUPTS(CLBR_ANY)             # could let 
syscall_trace_leave() call
-                                               # schedule() instead
+       ENABLE_INTERRUPTS(CLBR_ANY)
+
+syscall_exit_work:
        movl    %esp, %eax
-       call    syscall_trace_leave
-       jmp     resume_userspace
+       call    syscall_return_slowpath
+       jmp     restore_all
 END(syscall_exit_work)
 
 syscall_fault:
diff --git a/arch/x86/include/asm/thread_info.h 
b/arch/x86/include/asm/thread_info.h
index 225ee545e1a0..5a60392ce70e 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -95,6 +95,7 @@ struct thread_info {
 #define TIF_SYSCALL_EMU                6       /* syscall emulation active */
 #define TIF_SYSCALL_AUDIT      7       /* syscall auditing active */
 #define TIF_SECCOMP            8       /* secure computing */
+#define TIF_EXIT_VM86          9       /* deferred vm86 exit */
 #define TIF_USER_RETURN_NOTIFY 11      /* notify kernel of userspace return */
 #define TIF_UPROBE             12      /* breakpointed or singlestepping */
 #define TIF_NOTSC              16      /* TSC is not accessible in userland */
@@ -119,6 +120,7 @@ struct thread_info {
 #define _TIF_SYSCALL_EMU       (1 << TIF_SYSCALL_EMU)
 #define _TIF_SYSCALL_AUDIT     (1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP           (1 << TIF_SECCOMP)
+#define _TIF_EXIT_VM86         (1 << TIF_EXIT_VM86)
 #define _TIF_USER_RETURN_NOTIFY        (1 << TIF_USER_RETURN_NOTIFY)
 #define _TIF_UPROBE            (1 << TIF_UPROBE)
 #define _TIF_NOTSC             (1 << TIF_NOTSC)
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index fc9db6ef2a95..46dcef7046b6 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -549,11 +549,9 @@ int handle_vm86_trap(struct kernel_vm86_regs *regs, long 
error_code, int trapno)
 {
        if (VMPI.is_vm86pus) {
                if ((trapno == 3) || (trapno == 1)) {
+                       /* Queue up a return to normal userspace. */
                        KVM86->regs32->ax = VM86_TRAP + (trapno << 8);
-                       /* setting this flag forces the code in entry_32.S to
-                          the path where we call save_v86_state() and change
-                          the stack pointer to KVM86->regs32 */
-                       set_thread_flag(TIF_NOTIFY_RESUME);
+                       set_thread_flag(TIF_EXIT_VM86);
                        return 0;
                }
                do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 
4), SP(regs));
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to