This moves the espfix64 logic into native_iret.  To make this work,
it gets rid of the native patch for INTERRUPT_RETURN:
INTERRUPT_RETURN on native kernels is now 'jmp native_iret'.

This changes the 16-bit SS behavior on Xen from OOPSing to leaking
some bits of the Xen hypervisor's RSP (I think).

Signed-off-by: Andy Lutomirski <[email protected]>
---

I just pushed this to kernel.org, so the kbuild robot should be pounding
on it soon.  It passes all my tests on bare metal, and it fails them
without crashing on Xen.

It's not exactly beautiful, but I think it's an improvement.

 arch/x86/include/asm/irqflags.h     |  2 +-
 arch/x86/kernel/entry_64.S          | 28 ++++++++++------------------
 arch/x86/kernel/paravirt_patch_64.c |  2 --
 3 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index bba3cf8..0a8b519 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -129,7 +129,7 @@ static inline notrace unsigned long 
arch_local_irq_save(void)
 
 #define PARAVIRT_ADJUST_EXCEPTION_FRAME        /*  */
 
-#define INTERRUPT_RETURN       iretq
+#define INTERRUPT_RETURN       jmp native_iret
 #define USERGS_SYSRET64                                \
        swapgs;                                 \
        sysretq;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b25ca96..c844f08 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -830,27 +830,24 @@ restore_args:
        RESTORE_ARGS 1,8,1
 
 irq_return:
+       INTERRUPT_RETURN
+
+ENTRY(native_iret)
        /*
         * Are we returning to a stack segment from the LDT?  Note: in
         * 64-bit mode SS:RSP on the exception stack is always valid.
         */
 #ifdef CONFIG_X86_ESPFIX64
        testb $4,(SS-RIP)(%rsp)
-       jnz irq_return_ldt
+       jnz native_irq_return_ldt
 #endif
 
-irq_return_iret:
-       INTERRUPT_RETURN
-       _ASM_EXTABLE(irq_return_iret, bad_iret)
-
-#ifdef CONFIG_PARAVIRT
-ENTRY(native_iret)
+native_irq_return_iret:
        iretq
-       _ASM_EXTABLE(native_iret, bad_iret)
-#endif
+       _ASM_EXTABLE(native_irq_return_iret, bad_iret)
 
 #ifdef CONFIG_X86_ESPFIX64
-irq_return_ldt:
+native_irq_return_ldt:
        pushq_cfi %rax
        pushq_cfi %rdi
        SWAPGS
@@ -872,7 +869,7 @@ irq_return_ldt:
        SWAPGS
        movq %rax,%rsp
        popq_cfi %rax
-       jmp irq_return_iret
+       jmp native_irq_return_iret
 #endif
 
        .section .fixup,"ax"
@@ -956,13 +953,8 @@ __do_double_fault:
        cmpl $__KERNEL_CS,CS(%rdi)
        jne do_double_fault
        movq RIP(%rdi),%rax
-       cmpq $irq_return_iret,%rax
-#ifdef CONFIG_PARAVIRT
-       je 1f
-       cmpq $native_iret,%rax
-#endif
+       cmpq $native_irq_return_iret,%rax
        jne do_double_fault             /* This shouldn't happen... */
-1:
        movq PER_CPU_VAR(kernel_stack),%rax
        subq $(6*8-KERNEL_STACK_OFFSET),%rax    /* Reset to original stack */
        movq %rax,RSP(%rdi)
@@ -1428,7 +1420,7 @@ error_sti:
  */
 error_kernelspace:
        incl %ebx
-       leaq irq_return_iret(%rip),%rcx
+       leaq native_irq_return_iret(%rip),%rcx
        cmpq %rcx,RIP+8(%rsp)
        je error_swapgs
        movl %ecx,%eax  /* zero extend */
diff --git a/arch/x86/kernel/paravirt_patch_64.c 
b/arch/x86/kernel/paravirt_patch_64.c
index 3f08f34..a1da673 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -6,7 +6,6 @@ DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
 DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
 DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq");
 DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
-DEF_NATIVE(pv_cpu_ops, iret, "iretq");
 DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
 DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
 DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
@@ -50,7 +49,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
                PATCH_SITE(pv_irq_ops, save_fl);
                PATCH_SITE(pv_irq_ops, irq_enable);
                PATCH_SITE(pv_irq_ops, irq_disable);
-               PATCH_SITE(pv_cpu_ops, iret);
                PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
                PATCH_SITE(pv_cpu_ops, usergs_sysret32);
                PATCH_SITE(pv_cpu_ops, usergs_sysret64);
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to