Author: gordon
Date: Wed Mar 14 04:00:00 2018
New Revision: 330908
URL: https://svnweb.freebsd.org/changeset/base/330908

Log:
  Add mitigations for two classes of speculative execution vulnerabilities
  on amd64. [FreeBSD-SA-18:03.speculative_execution]
  
  Approved by:  so
  Security:     FreeBSD-SA-18:03.speculative_execution
  Security:     CVE-2017-5715
  Security:     CVE-2017-5754

Modified:
  releng/11.1/UPDATING
  releng/11.1/sys/amd64/amd64/apic_vector.S
  releng/11.1/sys/amd64/amd64/atpic_vector.S
  releng/11.1/sys/amd64/amd64/cpu_switch.S
  releng/11.1/sys/amd64/amd64/db_trace.c
  releng/11.1/sys/amd64/amd64/exception.S
  releng/11.1/sys/amd64/amd64/genassym.c
  releng/11.1/sys/amd64/amd64/initcpu.c
  releng/11.1/sys/amd64/amd64/machdep.c
  releng/11.1/sys/amd64/amd64/mp_machdep.c
  releng/11.1/sys/amd64/amd64/pmap.c
  releng/11.1/sys/amd64/amd64/support.S
  releng/11.1/sys/amd64/amd64/sys_machdep.c
  releng/11.1/sys/amd64/amd64/trap.c
  releng/11.1/sys/amd64/amd64/vm_machdep.c
  releng/11.1/sys/amd64/ia32/ia32_exception.S
  releng/11.1/sys/amd64/ia32/ia32_syscall.c
  releng/11.1/sys/amd64/include/asmacros.h
  releng/11.1/sys/amd64/include/frame.h
  releng/11.1/sys/amd64/include/intr_machdep.h
  releng/11.1/sys/amd64/include/md_var.h
  releng/11.1/sys/amd64/include/pcb.h
  releng/11.1/sys/amd64/include/pcpu.h
  releng/11.1/sys/amd64/include/pmap.h
  releng/11.1/sys/amd64/include/smp.h
  releng/11.1/sys/amd64/vmm/intel/vmx.c
  releng/11.1/sys/amd64/vmm/vmm.c
  releng/11.1/sys/conf/Makefile.amd64
  releng/11.1/sys/conf/newvers.sh
  releng/11.1/sys/dev/cpuctl/cpuctl.c
  releng/11.1/sys/dev/hyperv/vmbus/amd64/vmbus_vector.S
  releng/11.1/sys/dev/hyperv/vmbus/i386/vmbus_vector.S
  releng/11.1/sys/dev/hyperv/vmbus/vmbus.c
  releng/11.1/sys/i386/i386/apic_vector.s
  releng/11.1/sys/i386/i386/atpic_vector.s
  releng/11.1/sys/i386/i386/exception.s
  releng/11.1/sys/i386/i386/machdep.c
  releng/11.1/sys/i386/i386/pmap.c
  releng/11.1/sys/i386/i386/support.s
  releng/11.1/sys/i386/i386/vm_machdep.c
  releng/11.1/sys/sys/cpuctl.h
  releng/11.1/sys/x86/include/apicvar.h
  releng/11.1/sys/x86/include/specialreg.h
  releng/11.1/sys/x86/include/x86_smp.h
  releng/11.1/sys/x86/include/x86_var.h
  releng/11.1/sys/x86/isa/atpic.c
  releng/11.1/sys/x86/x86/cpu_machdep.c
  releng/11.1/sys/x86/x86/identcpu.c
  releng/11.1/sys/x86/x86/local_apic.c
  releng/11.1/sys/x86/x86/mp_x86.c
  releng/11.1/sys/x86/xen/pv.c
  releng/11.1/usr.sbin/cpucontrol/cpucontrol.8
  releng/11.1/usr.sbin/cpucontrol/cpucontrol.c

Modified: releng/11.1/UPDATING
==============================================================================
--- releng/11.1/UPDATING        Wed Mar 14 03:50:12 2018        (r330907)
+++ releng/11.1/UPDATING        Wed Mar 14 04:00:00 2018        (r330908)
@@ -16,6 +16,11 @@ from older versions of FreeBSD, try WITHOUT_CLANG and 
 the tip of head, and then rebuild without this option. The bootstrap process
 from older version of current across the gcc/clang cutover is a bit fragile.
 
+20180314       p8      FreeBSD-SA-18:03.speculative_execution
+
+       Add mitigations for two classes of speculative execution vulnerabilities
+       on amd64.
+
 20180307       p7      FreeBSD-SA-18:01.ipsec
                        FreeBSD-SA-18:02.ntp
                        FreeBSD-EN-18:01.tzdata

Modified: releng/11.1/sys/amd64/amd64/apic_vector.S
==============================================================================
--- releng/11.1/sys/amd64/amd64/apic_vector.S   Wed Mar 14 03:50:12 2018        
(r330907)
+++ releng/11.1/sys/amd64/amd64/apic_vector.S   Wed Mar 14 04:00:00 2018        
(r330908)
@@ -2,7 +2,13 @@
  * Copyright (c) 1989, 1990 William F. Jolitz.
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
+ * Copyright (c) 2014-2018 The FreeBSD Foundation
+ * All rights reserved.
  *
+ * Portions of this software were developed by
+ * Konstantin Belousov <k...@freebsd.org> under sponsorship from
+ * the FreeBSD Foundation.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -38,12 +44,12 @@
 
 #include "opt_smp.h"
 
+#include "assym.s"
+
 #include <machine/asmacros.h>
 #include <machine/specialreg.h>
 #include <x86/apicreg.h>
 
-#include "assym.s"
-
 #ifdef SMP
 #define LK     lock ;
 #else
@@ -73,30 +79,28 @@ as_lapic_eoi:
  * translates that into a vector, and passes the vector to the
  * lapic_handle_intr() function.
  */
-#define        ISR_VEC(index, vec_name)                                        
\
-       .text ;                                                         \
-       SUPERALIGN_TEXT ;                                               \
-IDTVEC(vec_name) ;                                                     \
-       PUSH_FRAME ;                                                    \
-       FAKE_MCOUNT(TF_RIP(%rsp)) ;                                     \
-       cmpl    $0,x2apic_mode ;                                        \
-       je      1f ;                                                    \
-       movl    $(MSR_APIC_ISR0 + index),%ecx ;                         \
-       rdmsr ;                                                         \
-       jmp     2f ;                                                    \
-1: ;                                                                   \
-       movq    lapic_map, %rdx ;       /* pointer to local APIC */     \
-       movl    LA_ISR + 16 * (index)(%rdx), %eax ;     /* load ISR */  \
-2: ;                                                                   \
-       bsrl    %eax, %eax ;    /* index of highest set bit in ISR */   \
-       jz      3f ;                                                    \
-       addl    $(32 * index),%eax ;                                    \
-       movq    %rsp, %rsi      ;                                       \
-       movl    %eax, %edi ;    /* pass the IRQ */                      \
-       call    lapic_handle_intr ;                                     \
-3: ;                                                                   \
-       MEXITCOUNT ;                                                    \
+       .macro  ISR_VEC index, vec_name
+       INTR_HANDLER    \vec_name
+       FAKE_MCOUNT(TF_RIP(%rsp))
+       cmpl    $0,x2apic_mode
+       je      1f
+       movl    $(MSR_APIC_ISR0 + \index),%ecx
+       rdmsr
+       jmp     2f
+1:
+       movq    lapic_map, %rdx         /* pointer to local APIC */
+       movl    LA_ISR + 16 * (\index)(%rdx), %eax      /* load ISR */
+2:
+       bsrl    %eax, %eax      /* index of highest set bit in ISR */
+       jz      3f
+       addl    $(32 * \index),%eax
+       movq    %rsp, %rsi
+       movl    %eax, %edi      /* pass the IRQ */
+       call    lapic_handle_intr
+3:
+       MEXITCOUNT
        jmp     doreti
+       .endm
 
 /*
  * Handle "spurious INTerrupts".
@@ -108,26 +112,21 @@ IDTVEC(vec_name) ;                                        
                \
        .text
        SUPERALIGN_TEXT
 IDTVEC(spuriousint)
-
        /* No EOI cycle used here */
-
        jmp     doreti_iret
 
-       ISR_VEC(1, apic_isr1)
-       ISR_VEC(2, apic_isr2)
-       ISR_VEC(3, apic_isr3)
-       ISR_VEC(4, apic_isr4)
-       ISR_VEC(5, apic_isr5)
-       ISR_VEC(6, apic_isr6)
-       ISR_VEC(7, apic_isr7)
+       ISR_VEC 1, apic_isr1
+       ISR_VEC 2, apic_isr2
+       ISR_VEC 3, apic_isr3
+       ISR_VEC 4, apic_isr4
+       ISR_VEC 5, apic_isr5
+       ISR_VEC 6, apic_isr6
+       ISR_VEC 7, apic_isr7
 
 /*
  * Local APIC periodic timer handler.
  */
-       .text
-       SUPERALIGN_TEXT
-IDTVEC(timerint)
-       PUSH_FRAME
+       INTR_HANDLER    timerint
        FAKE_MCOUNT(TF_RIP(%rsp))
        movq    %rsp, %rdi
        call    lapic_handle_timer
@@ -137,10 +136,7 @@ IDTVEC(timerint)
 /*
  * Local APIC CMCI handler.
  */
-       .text
-       SUPERALIGN_TEXT
-IDTVEC(cmcint)
-       PUSH_FRAME
+       INTR_HANDLER cmcint
        FAKE_MCOUNT(TF_RIP(%rsp))
        call    lapic_handle_cmc
        MEXITCOUNT
@@ -149,10 +145,7 @@ IDTVEC(cmcint)
 /*
  * Local APIC error interrupt handler.
  */
-       .text
-       SUPERALIGN_TEXT
-IDTVEC(errorint)
-       PUSH_FRAME
+       INTR_HANDLER errorint
        FAKE_MCOUNT(TF_RIP(%rsp))
        call    lapic_handle_error
        MEXITCOUNT
@@ -163,10 +156,7 @@ IDTVEC(errorint)
  * Xen event channel upcall interrupt handler.
  * Only used when the hypervisor supports direct vector callbacks.
  */
-       .text
-       SUPERALIGN_TEXT
-IDTVEC(xen_intr_upcall)
-       PUSH_FRAME
+       INTR_HANDLER xen_intr_upcall
        FAKE_MCOUNT(TF_RIP(%rsp))
        movq    %rsp, %rdi
        call    xen_intr_handle_upcall
@@ -183,74 +173,68 @@ IDTVEC(xen_intr_upcall)
        SUPERALIGN_TEXT
 invltlb_ret:
        call    as_lapic_eoi
-       POP_FRAME
-       jmp     doreti_iret
+       jmp     ld_regs
 
        SUPERALIGN_TEXT
-IDTVEC(invltlb)
-       PUSH_FRAME
-
+       INTR_HANDLER invltlb
        call    invltlb_handler
        jmp     invltlb_ret
 
-IDTVEC(invltlb_pcid)
-       PUSH_FRAME
-
+       INTR_HANDLER invltlb_pcid
        call    invltlb_pcid_handler
        jmp     invltlb_ret
 
-IDTVEC(invltlb_invpcid)
-       PUSH_FRAME
-
+       INTR_HANDLER invltlb_invpcid_nopti
        call    invltlb_invpcid_handler
        jmp     invltlb_ret
 
+       INTR_HANDLER invltlb_invpcid_pti
+       call    invltlb_invpcid_pti_handler
+       jmp     invltlb_ret
+
 /*
  * Single page TLB shootdown
  */
-       .text
+       INTR_HANDLER invlpg
+       call    invlpg_handler
+       jmp     invltlb_ret
 
-       SUPERALIGN_TEXT
-IDTVEC(invlpg)
-       PUSH_FRAME
+       INTR_HANDLER invlpg_invpcid
+       call    invlpg_invpcid_handler
+       jmp     invltlb_ret
 
-       call    invlpg_handler
+       INTR_HANDLER invlpg_pcid
+       call    invlpg_pcid_handler
        jmp     invltlb_ret
 
 /*
  * Page range TLB shootdown.
  */
-       .text
-       SUPERALIGN_TEXT
-IDTVEC(invlrng)
-       PUSH_FRAME
-
+       INTR_HANDLER invlrng
        call    invlrng_handler
        jmp     invltlb_ret
 
+       INTR_HANDLER invlrng_invpcid
+       call    invlrng_invpcid_handler
+       jmp     invltlb_ret
+
+       INTR_HANDLER invlrng_pcid
+       call    invlrng_pcid_handler
+       jmp     invltlb_ret
+
 /*
  * Invalidate cache.
  */
-       .text
-       SUPERALIGN_TEXT
-IDTVEC(invlcache)
-       PUSH_FRAME
-
+       INTR_HANDLER invlcache
        call    invlcache_handler
        jmp     invltlb_ret
 
 /*
  * Handler for IPIs sent via the per-cpu IPI bitmap.
  */
-       .text
-       SUPERALIGN_TEXT
-IDTVEC(ipi_intr_bitmap_handler)                
-       PUSH_FRAME
-
+       INTR_HANDLER ipi_intr_bitmap_handler
        call    as_lapic_eoi
-       
        FAKE_MCOUNT(TF_RIP(%rsp))
-
        call    ipi_bitmap_handler
        MEXITCOUNT
        jmp     doreti
@@ -258,24 +242,15 @@ IDTVEC(ipi_intr_bitmap_handler)           
 /*
  * Executed by a CPU when it receives an IPI_STOP from another CPU.
  */
-       .text
-       SUPERALIGN_TEXT
-IDTVEC(cpustop)
-       PUSH_FRAME
-
+       INTR_HANDLER cpustop
        call    as_lapic_eoi
-
        call    cpustop_handler
        jmp     doreti
 
 /*
  * Executed by a CPU when it receives an IPI_SUSPEND from another CPU.
  */
-       .text
-       SUPERALIGN_TEXT
-IDTVEC(cpususpend)
-       PUSH_FRAME
-
+       INTR_HANDLER cpususpend
        call    cpususpend_handler
        call    as_lapic_eoi
        jmp     doreti
@@ -285,10 +260,7 @@ IDTVEC(cpususpend)
  *
  * - Calls the generic rendezvous action function.
  */
-       .text
-       SUPERALIGN_TEXT
-IDTVEC(rendezvous)
-       PUSH_FRAME
+       INTR_HANDLER rendezvous
 #ifdef COUNT_IPIS
        movl    PCPU(CPUID), %eax
        movq    ipi_rendezvous_counts(,%rax,8), %rax
@@ -327,5 +299,9 @@ IDTVEC(justreturn)
        popq    %rcx
        popq    %rax
        jmp     doreti_iret
+
+       INTR_HANDLER    justreturn1
+       call    as_lapic_eoi
+       jmp     doreti
 
 #endif /* SMP */

Modified: releng/11.1/sys/amd64/amd64/atpic_vector.S
==============================================================================
--- releng/11.1/sys/amd64/amd64/atpic_vector.S  Wed Mar 14 03:50:12 2018        
(r330907)
+++ releng/11.1/sys/amd64/amd64/atpic_vector.S  Wed Mar 14 04:00:00 2018        
(r330908)
@@ -36,38 +36,35 @@
  * master and slave interrupt controllers.
  */
 
+#include "assym.s"
 #include <machine/asmacros.h>
 
-#include "assym.s"
-
 /*
  * Macros for interrupt entry, call to handler, and exit.
  */
-#define        INTR(irq_num, vec_name) \
-       .text ;                                                         \
-       SUPERALIGN_TEXT ;                                               \
-IDTVEC(vec_name) ;                                                     \
-       PUSH_FRAME ;                                                    \
-       FAKE_MCOUNT(TF_RIP(%rsp)) ;                                     \
-       movq    %rsp, %rsi      ;                                       \
-       movl    $irq_num, %edi;         /* pass the IRQ */              \
-       call    atpic_handle_intr ;                                     \
-       MEXITCOUNT ;                                                    \
+       .macro  INTR    irq_num, vec_name
+       INTR_HANDLER    \vec_name
+       FAKE_MCOUNT(TF_RIP(%rsp))
+       movq    %rsp, %rsi
+       movl    $\irq_num, %edi         /* pass the IRQ */
+       call    atpic_handle_intr
+       MEXITCOUNT
        jmp     doreti
+       .endm
 
-       INTR(0, atpic_intr0)
-       INTR(1, atpic_intr1)
-       INTR(2, atpic_intr2)
-       INTR(3, atpic_intr3)
-       INTR(4, atpic_intr4)
-       INTR(5, atpic_intr5)
-       INTR(6, atpic_intr6)
-       INTR(7, atpic_intr7)
-       INTR(8, atpic_intr8)
-       INTR(9, atpic_intr9)
-       INTR(10, atpic_intr10)
-       INTR(11, atpic_intr11)
-       INTR(12, atpic_intr12)
-       INTR(13, atpic_intr13)
-       INTR(14, atpic_intr14)
-       INTR(15, atpic_intr15)
+       INTR    0, atpic_intr0
+       INTR    1, atpic_intr1
+       INTR    2, atpic_intr2
+       INTR    3, atpic_intr3
+       INTR    4, atpic_intr4
+       INTR    5, atpic_intr5
+       INTR    6, atpic_intr6
+       INTR    7, atpic_intr7
+       INTR    8, atpic_intr8
+       INTR    9, atpic_intr9
+       INTR    10, atpic_intr10
+       INTR    11, atpic_intr11
+       INTR    12, atpic_intr12
+       INTR    13, atpic_intr13
+       INTR    14, atpic_intr14
+       INTR    15, atpic_intr15

Modified: releng/11.1/sys/amd64/amd64/cpu_switch.S
==============================================================================
--- releng/11.1/sys/amd64/amd64/cpu_switch.S    Wed Mar 14 03:50:12 2018        
(r330907)
+++ releng/11.1/sys/amd64/amd64/cpu_switch.S    Wed Mar 14 04:00:00 2018        
(r330908)
@@ -191,9 +191,11 @@ do_kthread:
 done_tss:
        movq    %r8,PCPU(RSP0)
        movq    %r8,PCPU(CURPCB)
-       /* Update the TSS_RSP0 pointer for the next interrupt */
+       /* Update the COMMON_TSS_RSP0 pointer for the next interrupt */
+       cmpb    $0,pti(%rip)
+       jne     1f
        movq    %r8,COMMON_TSS_RSP0(%rdx)
-       movq    %r12,PCPU(CURTHREAD)            /* into next thread */
+1:     movq    %r12,PCPU(CURTHREAD)            /* into next thread */
 
        /* Test if debug registers should be restored. */
        testl   $PCB_DBREGS,PCB_FLAGS(%r8)
@@ -270,7 +272,12 @@ do_tss:    movq    %rdx,PCPU(TSSP)
        shrq    $8,%rcx
        movl    %ecx,8(%rax)
        movb    $0x89,5(%rax)   /* unset busy */
-       movl    $TSSSEL,%eax
+       cmpb    $0,pti(%rip)
+       je      1f
+       movq    PCPU(PRVSPACE),%rax
+       addq    $PC_PTI_STACK+PC_PTI_STACK_SZ*8,%rax
+       movq    %rax,COMMON_TSS_RSP0(%rdx)
+1:     movl    $TSSSEL,%eax
        ltr     %ax
        jmp     done_tss
 

Modified: releng/11.1/sys/amd64/amd64/db_trace.c
==============================================================================
--- releng/11.1/sys/amd64/amd64/db_trace.c      Wed Mar 14 03:50:12 2018        
(r330907)
+++ releng/11.1/sys/amd64/amd64/db_trace.c      Wed Mar 14 04:00:00 2018        
(r330908)
@@ -200,6 +200,7 @@ db_nextframe(struct amd64_frame **fp, db_addr_t *ip, s
        if (name != NULL) {
                if (strcmp(name, "calltrap") == 0 ||
                    strcmp(name, "fork_trampoline") == 0 ||
+                   strcmp(name, "mchk_calltrap") == 0 ||
                    strcmp(name, "nmi_calltrap") == 0 ||
                    strcmp(name, "Xdblfault") == 0)
                        frame_type = TRAP;

Modified: releng/11.1/sys/amd64/amd64/exception.S
==============================================================================
--- releng/11.1/sys/amd64/amd64/exception.S     Wed Mar 14 03:50:12 2018        
(r330907)
+++ releng/11.1/sys/amd64/amd64/exception.S     Wed Mar 14 04:00:00 2018        
(r330908)
@@ -1,12 +1,16 @@
 /*-
  * Copyright (c) 1989, 1990 William F. Jolitz.
  * Copyright (c) 1990 The Regents of the University of California.
- * Copyright (c) 2007 The FreeBSD Foundation
+ * Copyright (c) 2007-2018 The FreeBSD Foundation
  * All rights reserved.
  *
  * Portions of this software were developed by A. Joseph Koshy under
  * sponsorship from the FreeBSD Foundation and Google, Inc.
  *
+ * Portions of this software were developed by
+ * Konstantin Belousov <k...@freebsd.org> under sponsorship from
+ * the FreeBSD Foundation.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -38,13 +42,13 @@
 #include "opt_compat.h"
 #include "opt_hwpmc_hooks.h"
 
+#include "assym.s"
+
 #include <machine/asmacros.h>
 #include <machine/psl.h>
 #include <machine/trap.h>
 #include <machine/specialreg.h>
 
-#include "assym.s"
-
 #ifdef KDTRACE_HOOKS
        .bss
        .globl  dtrace_invop_jump_addr
@@ -100,69 +104,62 @@ dtrace_invop_calltrap_addr:
 MCOUNT_LABEL(user)
 MCOUNT_LABEL(btrap)
 
-/* Traps that we leave interrupts disabled for.. */
-#define        TRAP_NOEN(a)    \
-       subq $TF_RIP,%rsp; \
-       movl $(a),TF_TRAPNO(%rsp) ; \
-       movq $0,TF_ADDR(%rsp) ; \
-       movq $0,TF_ERR(%rsp) ; \
+/* Traps that we leave interrupts disabled for. */
+       .macro  TRAP_NOEN       l, trapno
+       PTI_ENTRY       \l,X\l
+       .globl  X\l
+       .type   X\l,@function
+X\l:   subq $TF_RIP,%rsp
+       movl $\trapno,TF_TRAPNO(%rsp)
+       movq $0,TF_ADDR(%rsp)
+       movq $0,TF_ERR(%rsp)
        jmp alltraps_noen
-IDTVEC(dbg)
-       TRAP_NOEN(T_TRCTRAP)
-IDTVEC(bpt)
-       TRAP_NOEN(T_BPTFLT)
+       .endm
+
+       TRAP_NOEN       dbg, T_TRCTRAP
+       TRAP_NOEN       bpt, T_BPTFLT
 #ifdef KDTRACE_HOOKS
-IDTVEC(dtrace_ret)
-       TRAP_NOEN(T_DTRACE_RET)
+       TRAP_NOEN       dtrace_ret, T_DTRACE_RET
 #endif
 
 /* Regular traps; The cpu does not supply tf_err for these. */
-#define        TRAP(a)  \
-       subq $TF_RIP,%rsp; \
-       movl $(a),TF_TRAPNO(%rsp) ; \
-       movq $0,TF_ADDR(%rsp) ; \
-       movq $0,TF_ERR(%rsp) ; \
+       .macro  TRAP    l, trapno
+       PTI_ENTRY       \l,X\l
+       .globl  X\l
+       .type   X\l,@function
+X\l:
+       subq $TF_RIP,%rsp
+       movl $\trapno,TF_TRAPNO(%rsp)
+       movq $0,TF_ADDR(%rsp)
+       movq $0,TF_ERR(%rsp)
        jmp alltraps
-IDTVEC(div)
-       TRAP(T_DIVIDE)
-IDTVEC(ofl)
-       TRAP(T_OFLOW)
-IDTVEC(bnd)
-       TRAP(T_BOUND)
-IDTVEC(ill)
-       TRAP(T_PRIVINFLT)
-IDTVEC(dna)
-       TRAP(T_DNA)
-IDTVEC(fpusegm)
-       TRAP(T_FPOPFLT)
-IDTVEC(mchk)
-       TRAP(T_MCHK)
-IDTVEC(rsvd)
-       TRAP(T_RESERVED)
-IDTVEC(fpu)
-       TRAP(T_ARITHTRAP)
-IDTVEC(xmm)
-       TRAP(T_XMMFLT)
+       .endm
 
-/* This group of traps have tf_err already pushed by the cpu */
-#define        TRAP_ERR(a)     \
-       subq $TF_ERR,%rsp; \
-       movl $(a),TF_TRAPNO(%rsp) ; \
-       movq $0,TF_ADDR(%rsp) ; \
+       TRAP    div, T_DIVIDE
+       TRAP    ofl, T_OFLOW
+       TRAP    bnd, T_BOUND
+       TRAP    ill, T_PRIVINFLT
+       TRAP    dna, T_DNA
+       TRAP    fpusegm, T_FPOPFLT
+       TRAP    rsvd, T_RESERVED
+       TRAP    fpu, T_ARITHTRAP
+       TRAP    xmm, T_XMMFLT
+
+/* This group of traps have tf_err already pushed by the cpu. */
+       .macro  TRAP_ERR        l, trapno
+       PTI_ENTRY       \l,X\l,has_err=1
+       .globl  X\l
+       .type   X\l,@function
+X\l:
+       subq $TF_ERR,%rsp
+       movl $\trapno,TF_TRAPNO(%rsp)
+       movq $0,TF_ADDR(%rsp)
        jmp alltraps
-IDTVEC(tss)
-       TRAP_ERR(T_TSSFLT)
-IDTVEC(missing)
-       subq    $TF_ERR,%rsp
-       movl    $T_SEGNPFLT,TF_TRAPNO(%rsp)
-       jmp     prot_addrf
-IDTVEC(stk)
-       subq    $TF_ERR,%rsp
-       movl    $T_STKFLT,TF_TRAPNO(%rsp)
-       jmp     prot_addrf
-IDTVEC(align)
-       TRAP_ERR(T_ALIGNFLT)
+       .endm
 
+       TRAP_ERR        tss, T_TSSFLT
+       TRAP_ERR        align, T_ALIGNFLT
+
        /*
         * alltraps entry point.  Use swapgs if this is the first time in the
         * kernel from userland.  Reenable interrupts if they were enabled
@@ -174,25 +171,24 @@ IDTVEC(align)
 alltraps:
        movq    %rdi,TF_RDI(%rsp)
        testb   $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
-       jz      alltraps_testi          /* already running with kernel GS.base 
*/
+       jz      1f              /* already running with kernel GS.base */
        swapgs
        movq    PCPU(CURPCB),%rdi
        andl    $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
-       movw    %fs,TF_FS(%rsp)
-       movw    %gs,TF_GS(%rsp)
-       movw    %es,TF_ES(%rsp)
-       movw    %ds,TF_DS(%rsp)
-alltraps_testi:
-       testl   $PSL_I,TF_RFLAGS(%rsp)
-       jz      alltraps_pushregs_no_rdi
-       sti
-alltraps_pushregs_no_rdi:
-       movq    %rsi,TF_RSI(%rsp)
+1:     SAVE_SEGS
        movq    %rdx,TF_RDX(%rsp)
+       movq    %rax,TF_RAX(%rsp)
        movq    %rcx,TF_RCX(%rsp)
+       testb   $SEL_RPL_MASK,TF_CS(%rsp)
+       jz      2f
+       call    handle_ibrs_entry
+2:     testl   $PSL_I,TF_RFLAGS(%rsp)
+       jz      alltraps_pushregs_no_rax
+       sti
+alltraps_pushregs_no_rax:
+       movq    %rsi,TF_RSI(%rsp)
        movq    %r8,TF_R8(%rsp)
        movq    %r9,TF_R9(%rsp)
-       movq    %rax,TF_RAX(%rsp)
        movq    %rbx,TF_RBX(%rsp)
        movq    %rbp,TF_RBP(%rsp)
        movq    %r10,TF_R10(%rsp)
@@ -248,15 +244,18 @@ calltrap:
 alltraps_noen:
        movq    %rdi,TF_RDI(%rsp)
        testb   $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
-       jz      1f      /* already running with kernel GS.base */
+       jz      1f /* already running with kernel GS.base */
        swapgs
        movq    PCPU(CURPCB),%rdi
        andl    $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
-1:     movw    %fs,TF_FS(%rsp)
-       movw    %gs,TF_GS(%rsp)
-       movw    %es,TF_ES(%rsp)
-       movw    %ds,TF_DS(%rsp)
-       jmp     alltraps_pushregs_no_rdi
+1:     SAVE_SEGS
+       movq    %rdx,TF_RDX(%rsp)
+       movq    %rax,TF_RAX(%rsp)
+       movq    %rcx,TF_RCX(%rsp)
+       testb   $SEL_RPL_MASK,TF_CS(%rsp)
+       jz      alltraps_pushregs_no_rax
+       call    handle_ibrs_entry
+       jmp     alltraps_pushregs_no_rax
 
 IDTVEC(dblfault)
        subq    $TF_ERR,%rsp
@@ -278,71 +277,132 @@ IDTVEC(dblfault)
        movq    %r13,TF_R13(%rsp)
        movq    %r14,TF_R14(%rsp)
        movq    %r15,TF_R15(%rsp)
-       movw    %fs,TF_FS(%rsp)
-       movw    %gs,TF_GS(%rsp)
-       movw    %es,TF_ES(%rsp)
-       movw    %ds,TF_DS(%rsp)
+       SAVE_SEGS
        movl    $TF_HASSEGS,TF_FLAGS(%rsp)
        cld
        testb   $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
        jz      1f                      /* already running with kernel GS.base 
*/
        swapgs
 1:
-       movq    %rsp,%rdi
+       movq    PCPU(KCR3),%rax
+       cmpq    $~0,%rax
+       je      2f
+       movq    %rax,%cr3
+2:     movq    %rsp,%rdi
        call    dblfault_handler
-2:
-       hlt
-       jmp     2b
+3:     hlt
+       jmp     3b
 
+       ALIGN_TEXT
+IDTVEC(page_pti)
+       testb   $SEL_RPL_MASK,PTI_CS-2*8(%rsp)
+       jz      Xpage
+       swapgs
+       pushq   %rax
+       pushq   %rdx
+       movq    %cr3,%rax
+       movq    %rax,PCPU(SAVED_UCR3)
+       PTI_UUENTRY has_err=1
+       subq    $TF_ERR,%rsp
+       movq    %rdi,TF_RDI(%rsp)
+       movq    %rax,TF_RAX(%rsp)
+       movq    %rdx,TF_RDX(%rsp)
+       movq    %rcx,TF_RCX(%rsp)
+       jmp     page_u
 IDTVEC(page)
        subq    $TF_ERR,%rsp
-       movl    $T_PAGEFLT,TF_TRAPNO(%rsp)
-       movq    %rdi,TF_RDI(%rsp)       /* free up a GP register */
+       movq    %rdi,TF_RDI(%rsp)       /* free up GP registers */
+       movq    %rax,TF_RAX(%rsp)
+       movq    %rdx,TF_RDX(%rsp)
+       movq    %rcx,TF_RCX(%rsp)
        testb   $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
-       jz      1f                      /* already running with kernel GS.base 
*/
+       jz      page_cr2                /* already running with kernel GS.base 
*/
        swapgs
-       movq    PCPU(CURPCB),%rdi
+page_u:        movq    PCPU(CURPCB),%rdi
        andl    $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
-1:     movq    %cr2,%rdi               /* preserve %cr2 before ..  */
+       movq    PCPU(SAVED_UCR3),%rax
+       movq    %rax,PCB_SAVED_UCR3(%rdi)
+       call    handle_ibrs_entry
+page_cr2:
+       movq    %cr2,%rdi               /* preserve %cr2 before ..  */
        movq    %rdi,TF_ADDR(%rsp)      /* enabling interrupts. */
-       movw    %fs,TF_FS(%rsp)
-       movw    %gs,TF_GS(%rsp)
-       movw    %es,TF_ES(%rsp)
-       movw    %ds,TF_DS(%rsp)
+       SAVE_SEGS
+       movl    $T_PAGEFLT,TF_TRAPNO(%rsp)
        testl   $PSL_I,TF_RFLAGS(%rsp)
-       jz      alltraps_pushregs_no_rdi
+       jz      alltraps_pushregs_no_rax
        sti
-       jmp     alltraps_pushregs_no_rdi
+       jmp     alltraps_pushregs_no_rax
 
        /*
         * We have to special-case this one.  If we get a trap in doreti() at
         * the iretq stage, we'll reenter with the wrong gs state.  We'll have
         * to do a special the swapgs in this case even coming from the kernel.
         * XXX linux has a trap handler for their equivalent of load_gs().
+        *
+        * On the stack, we have the hardware interrupt frame to return
+        * to usermode (faulted) and another frame with error code, for
+        * fault.  For PTI, copy both frames to the main thread stack.
         */
-IDTVEC(prot)
+       .macro PROTF_ENTRY name,trapno
+\name\()_pti_doreti:
+       pushq   %rax
+       pushq   %rdx
+       swapgs
+       movq    PCPU(KCR3),%rax
+       movq    %rax,%cr3
+       movq    PCPU(RSP0),%rax
+       subq    $2*PTI_SIZE-3*8,%rax /* no err, %rax, %rdx in faulted frame */
+       MOVE_STACKS     (PTI_SIZE / 4 - 3)
+       movq    %rax,%rsp
+       popq    %rdx
+       popq    %rax
+       swapgs
+       jmp     X\name
+IDTVEC(\name\()_pti)
+       cmpq    $doreti_iret,PTI_RIP-2*8(%rsp)
+       je      \name\()_pti_doreti
+       testb   $SEL_RPL_MASK,PTI_CS-2*8(%rsp) /* %rax, %rdx not yet pushed */
+       jz      X\name
+       PTI_UENTRY has_err=1
+       swapgs
+IDTVEC(\name)
        subq    $TF_ERR,%rsp
-       movl    $T_PROTFLT,TF_TRAPNO(%rsp)
+       movl    $\trapno,TF_TRAPNO(%rsp)
+       jmp     prot_addrf
+       .endm
+
+       PROTF_ENTRY     missing, T_SEGNPFLT
+       PROTF_ENTRY     stk, T_STKFLT
+       PROTF_ENTRY     prot, T_PROTFLT
+
 prot_addrf:
        movq    $0,TF_ADDR(%rsp)
        movq    %rdi,TF_RDI(%rsp)       /* free up a GP register */
+       movq    %rax,TF_RAX(%rsp)
+       movq    %rdx,TF_RDX(%rsp)
+       movq    %rcx,TF_RCX(%rsp)
+       movw    %fs,TF_FS(%rsp)
+       movw    %gs,TF_GS(%rsp)
        leaq    doreti_iret(%rip),%rdi
        cmpq    %rdi,TF_RIP(%rsp)
-       je      1f                      /* kernel but with user gsbase!! */
+       je      5f                      /* kernel but with user gsbase!! */
        testb   $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
-       jz      2f                      /* already running with kernel GS.base 
*/
-1:     swapgs
-2:     movq    PCPU(CURPCB),%rdi
+       jz      6f                      /* already running with kernel GS.base 
*/
+       swapgs
+       movq    PCPU(CURPCB),%rdi
+4:     call    handle_ibrs_entry
        orl     $PCB_FULL_IRET,PCB_FLAGS(%rdi)  /* always full iret from GPF */
-       movw    %fs,TF_FS(%rsp)
-       movw    %gs,TF_GS(%rsp)
        movw    %es,TF_ES(%rsp)
        movw    %ds,TF_DS(%rsp)
        testl   $PSL_I,TF_RFLAGS(%rsp)
-       jz      alltraps_pushregs_no_rdi
+       jz      alltraps_pushregs_no_rax
        sti
-       jmp     alltraps_pushregs_no_rdi
+       jmp     alltraps_pushregs_no_rax
 
+5:     swapgs
+6:     movq    PCPU(CURPCB),%rdi
+       jmp     4b
+
 /*
  * Fast syscall entry point.  We enter here with just our new %cs/%ss set,
  * and the new privilige level.  We are still running on the old user stack
@@ -352,8 +412,18 @@ prot_addrf:
  * We do not support invoking this from a custom %cs or %ss (e.g. using
  * entries from an LDT).
  */
+       SUPERALIGN_TEXT
+IDTVEC(fast_syscall_pti)
+       swapgs
+       movq    %rax,PCPU(SCRATCH_RAX)
+       movq    PCPU(KCR3),%rax
+       movq    %rax,%cr3
+       jmp     fast_syscall_common
+       SUPERALIGN_TEXT
 IDTVEC(fast_syscall)
        swapgs
+       movq    %rax,PCPU(SCRATCH_RAX)
+fast_syscall_common:
        movq    %rsp,PCPU(SCRATCH_RSP)
        movq    PCPU(RSP0),%rsp
        /* Now emulate a trapframe. Make the 8 byte alignment odd for call. */
@@ -363,10 +433,11 @@ IDTVEC(fast_syscall)
        movq    %rcx,TF_RIP(%rsp)       /* %rcx original value is in %r10 */
        movq    PCPU(SCRATCH_RSP),%r11  /* %r11 already saved */
        movq    %r11,TF_RSP(%rsp)       /* user stack pointer */
-       movw    %fs,TF_FS(%rsp)
-       movw    %gs,TF_GS(%rsp)
-       movw    %es,TF_ES(%rsp)
-       movw    %ds,TF_DS(%rsp)
+       movq    PCPU(SCRATCH_RAX),%rax
+       movq    %rax,TF_RAX(%rsp)       /* syscall number */
+       movq    %rdx,TF_RDX(%rsp)       /* arg 3 */
+       SAVE_SEGS
+       call    handle_ibrs_entry
        movq    PCPU(CURPCB),%r11
        andl    $~PCB_FULL_IRET,PCB_FLAGS(%r11)
        sti
@@ -375,11 +446,9 @@ IDTVEC(fast_syscall)
        movq    $2,TF_ERR(%rsp)
        movq    %rdi,TF_RDI(%rsp)       /* arg 1 */
        movq    %rsi,TF_RSI(%rsp)       /* arg 2 */
-       movq    %rdx,TF_RDX(%rsp)       /* arg 3 */
        movq    %r10,TF_RCX(%rsp)       /* arg 4 */
        movq    %r8,TF_R8(%rsp)         /* arg 5 */
        movq    %r9,TF_R9(%rsp)         /* arg 6 */
-       movq    %rax,TF_RAX(%rsp)       /* syscall number */
        movq    %rbx,TF_RBX(%rsp)       /* C preserved */
        movq    %rbp,TF_RBP(%rsp)       /* C preserved */
        movq    %r12,TF_R12(%rsp)       /* C preserved */
@@ -398,11 +467,12 @@ IDTVEC(fast_syscall)
        /* Disable interrupts before testing PCB_FULL_IRET. */
        cli
        testl   $PCB_FULL_IRET,PCB_FLAGS(%rax)
-       jnz     3f
+       jnz     4f
        /* Check for and handle AST's on return to userland. */
        movq    PCPU(CURTHREAD),%rax
        testl   $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax)
-       jne     2f
+       jne     3f
+       call    handle_ibrs_exit
        /* Restore preserved registers. */
        MEXITCOUNT
        movq    TF_RDI(%rsp),%rdi       /* bonus; preserve arg 1 */
@@ -412,16 +482,21 @@ IDTVEC(fast_syscall)
        movq    TF_RFLAGS(%rsp),%r11    /* original %rflags */
        movq    TF_RIP(%rsp),%rcx       /* original %rip */
        movq    TF_RSP(%rsp),%rsp       /* user stack pointer */
-       swapgs
+       cmpb    $0,pti
+       je      2f
+       movq    PCPU(UCR3),%r9
+       movq    %r9,%cr3
+       xorl    %r9d,%r9d
+2:     swapgs
        sysretq
 
-2:     /* AST scheduled. */
+3:     /* AST scheduled. */
        sti
        movq    %rsp,%rdi
        call    ast
        jmp     1b
 
-3:     /* Requested full context restore, use doreti for that. */
+4:     /* Requested full context restore, use doreti for that. */
        MEXITCOUNT
        jmp     doreti
 
@@ -477,17 +552,15 @@ IDTVEC(nmi)
        movq    %r13,TF_R13(%rsp)
        movq    %r14,TF_R14(%rsp)
        movq    %r15,TF_R15(%rsp)
-       movw    %fs,TF_FS(%rsp)
-       movw    %gs,TF_GS(%rsp)
-       movw    %es,TF_ES(%rsp)
-       movw    %ds,TF_DS(%rsp)
+       SAVE_SEGS
        movl    $TF_HASSEGS,TF_FLAGS(%rsp)
        cld
        xorl    %ebx,%ebx
        testb   $SEL_RPL_MASK,TF_CS(%rsp)
        jnz     nmi_fromuserspace
        /*
-        * We've interrupted the kernel.  Preserve GS.base in %r12.
+        * We've interrupted the kernel.  Preserve GS.base in %r12,
+        * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d.
         */
        movl    $MSR_GSBASE,%ecx
        rdmsr
@@ -499,10 +572,32 @@ IDTVEC(nmi)
        movl    %edx,%eax
        shrq    $32,%rdx
        wrmsr
+       movq    %cr3,%r13
+       movq    PCPU(KCR3),%rax
+       cmpq    $~0,%rax
+       je      1f
+       movq    %rax,%cr3
+1:     testl   $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
+       je      nmi_calltrap
+       movl    $MSR_IA32_SPEC_CTRL,%ecx
+       rdmsr
+       movl    %eax,%r14d
+       call    handle_ibrs_entry
        jmp     nmi_calltrap
 nmi_fromuserspace:
        incl    %ebx
        swapgs
+       movq    %cr3,%r13
+       movq    PCPU(KCR3),%rax
+       cmpq    $~0,%rax
+       je      1f
+       movq    %rax,%cr3
+1:     call    handle_ibrs_entry
+       movq    PCPU(CURPCB),%rdi
+       testq   %rdi,%rdi
+       jz      3f
+       orl     $PCB_FULL_IRET,PCB_FLAGS(%rdi)
+3:
 /* Note: this label is also used by ddb and gdb: */
 nmi_calltrap:
        FAKE_MCOUNT(TF_RIP(%rsp))
@@ -525,26 +620,29 @@ nmi_calltrap:
        movq    PCPU(CURTHREAD),%rax
        orq     %rax,%rax       /* curthread present? */
        jz      nocallchain
-       testl   $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */
-       jz      nocallchain
        /*
-        * A user callchain is to be captured, so:
-        * - Move execution to the regular kernel stack, to allow for
-        *   nested NMI interrupts.
-        * - Take the processor out of "NMI" mode by faking an "iret".
-        * - Enable interrupts, so that copyin() can work.
+        * Move execution to the regular kernel stack, because we
+        * committed to return through doreti.
         */
        movq    %rsp,%rsi       /* source stack pointer */
        movq    $TF_SIZE,%rcx
        movq    PCPU(RSP0),%rdx
        subq    %rcx,%rdx
        movq    %rdx,%rdi       /* destination stack pointer */
-
        shrq    $3,%rcx         /* trap frame size in long words */
        cld
        rep
        movsq                   /* copy trapframe */
+       movq    %rdx,%rsp       /* we are on the regular kstack */
 
+       testl   $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */
+       jz      nocallchain
+       /*
+        * A user callchain is to be captured, so:
+        * - Take the processor out of "NMI" mode by faking an "iret",
+        *   to allow for nested NMI interrupts.
+        * - Enable interrupts, so that copyin() can work.
+        */
        movl    %ss,%eax
        pushq   %rax            /* tf_ss */
        pushq   %rdx            /* tf_rsp (on kernel stack) */
@@ -574,33 +672,139 @@ outofnmi:
        cli
 nocallchain:
 #endif
-       testl   %ebx,%ebx
+       testl   %ebx,%ebx       /* %ebx == 0 => return to userland */
        jnz     doreti_exit
-nmi_kernelexit:
        /*
+        * Restore speculation control MSR, if preserved.
+        */
+       testl   $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
+       je      1f
+       movl    %r14d,%eax
+       xorl    %edx,%edx
+       movl    $MSR_IA32_SPEC_CTRL,%ecx
+       wrmsr
+       /*
         * Put back the preserved MSR_GSBASE value.

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to