Module Name: src Committed By: maxv Date: Thu Feb 22 08:56:52 UTC 2018
Modified Files: src/sys/arch/amd64/amd64: locore.S src/sys/arch/amd64/include: frameasm.h src/sys/arch/x86/x86: cpu.c patch.c svs.c x86_machdep.c Log Message: Add a dynamic detection for SVS. The SVS_* macros are now compiled as skip-noopt. When the system boots, if the cpu is from Intel, they are hotpatched to their real content. Typically: jmp 1f int3 int3 int3 ... int3 ... 1: gets hotpatched to: movq SVS_UTLS+UTLS_KPDIRPA,%rax movq %rax,%cr3 movq CPUVAR(KRSP0),%rsp These two chunks of code being of the exact same size. We put int3 (0xCC) to make sure we never execute there. In the non-SVS (ie non-Intel) case, all it costs is one jump. Given that the SVS_* macros are small, this jump will likely leave us in the same icache line, so it's pretty fast. The syscall entry point is special, because there we use a scratch uint64_t not in curcpu but in the UTLS page, and it's difficult to hotpatch this properly. So instead of hotpatching we declare the entry point as an ASM macro, and define two functions: syscall and syscall_svs, the latter being the one used in the SVS case. While here 'syscall' is optimized not to contain an SVS_ENTER - this way we don't even need to do a jump on the non-SVS case. When adding pages in the user page tables, make sure we don't have PG_G, now that it's dynamic. A read-only sysctl is added, machdep.svs_enabled, that tells whether the kernel uses SVS or not. More changes to come, svs_init() is not very clean. To generate a diff of this commit: cvs rdiff -u -r1.151 -r1.152 src/sys/arch/amd64/amd64/locore.S cvs rdiff -u -r1.34 -r1.35 src/sys/arch/amd64/include/frameasm.h cvs rdiff -u -r1.147 -r1.148 src/sys/arch/x86/x86/cpu.c cvs rdiff -u -r1.31 -r1.32 src/sys/arch/x86/x86/patch.c cvs rdiff -u -r1.3 -r1.4 src/sys/arch/x86/x86/svs.c cvs rdiff -u -r1.103 -r1.104 src/sys/arch/x86/x86/x86_machdep.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/amd64/amd64/locore.S diff -u src/sys/arch/amd64/amd64/locore.S:1.151 src/sys/arch/amd64/amd64/locore.S:1.152 --- src/sys/arch/amd64/amd64/locore.S:1.151 Sun Feb 18 14:07:29 2018 +++ src/sys/arch/amd64/amd64/locore.S Thu Feb 22 08:56:51 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: locore.S,v 1.151 2018/02/18 14:07:29 maxv Exp $ */ +/* $NetBSD: locore.S,v 1.152 2018/02/22 08:56:51 maxv Exp $ */ /* * Copyright-o-rama! @@ -1117,17 +1117,27 @@ ENTRY(cpu_switchto) /* Switch ring0 stack */ #ifdef SVS + movb _C_LABEL(svs_enabled),%al + testb %al,%al + jz .Lno_svs_switch + movq CPUVAR(RSP0),%rax movq CPUVAR(TSS),%rdi movq %rax,TSS_RSP0(%rdi) -#elif !defined(XEN) + jmp .Lring0_switched + +.Lno_svs_switch: +#endif + +#if !defined(XEN) movq PCB_RSP0(%r14),%rax movq CPUVAR(TSS),%rdi movq %rax,TSS_RSP0(%rdi) #else movq %r14,%rdi - callq _C_LABEL(x86_64_switch_context); + callq _C_LABEL(x86_64_switch_context) #endif +.Lring0_switched: /* Don't bother with the rest if switching to a system process. */ testl $LW_SYSTEM,L_FLAG(%r12) @@ -1347,9 +1357,10 @@ END(lwp_trampoline) /* * Entry points of the 'syscall' instruction, 64bit and 32bit mode. */ - TEXT_USER_BEGIN -IDTVEC(syscall) + +.macro SYSCALL_ENTRY name,is_svs +IDTVEC(\name) #ifndef XEN /* * The user %rip is in %rcx and the user %rflags in %r11. The kernel %cs @@ -1365,31 +1376,39 @@ IDTVEC(syscall) */ swapgs -#ifdef SVS - movq %rax,SVS_UTLS+UTLS_SCRATCH - movq SVS_UTLS+UTLS_RSP0,%rax -#define SP(x) (x)-(TF_SS+8)(%rax) -#else - movq %r15,CPUVAR(SCRATCH) - movq CPUVAR(CURLWP),%r15 - movq L_PCB(%r15),%r15 - movq PCB_RSP0(%r15),%r15 /* LWP's kernel stack pointer */ -#define SP(x) (x)-(TF_SS+8)(%r15) -#endif +#define SP(x,reg) (x)-(TF_SS+8)(reg) - /* Make stack look like an 'int nn' frame */ - movq $(LSEL(LUDATA_SEL, SEL_UPL)),SP(TF_SS) /* user %ss */ - movq %rsp,SP(TF_RSP) /* user %rsp */ - movq %r11,SP(TF_RFLAGS) /* user %rflags */ - movq $(LSEL(LUCODE_SEL, SEL_UPL)),SP(TF_CS) /* user %cs */ - movq %rcx,SP(TF_RIP) /* user %rip */ + .if \is_svs + movq %rax,SVS_UTLS+UTLS_SCRATCH + movq SVS_UTLS+UTLS_RSP0,%rax + + /* Make stack look like an 'int nn' frame */ + movq $(LSEL(LUDATA_SEL, SEL_UPL)),SP(TF_SS,%rax) /* user %ss */ + movq %rsp,SP(TF_RSP,%rax) /* user %rsp */ + movq %r11,SP(TF_RFLAGS,%rax) /* user %rflags */ + movq $(LSEL(LUCODE_SEL, SEL_UPL)),SP(TF_CS,%rax) /* user %cs */ + movq %rcx,SP(TF_RIP,%rax) /* user %rip */ + leaq SP(0,%rax),%rsp /* %rsp now valid after frame */ + + movq SVS_UTLS+UTLS_SCRATCH,%rax + .else + movq %r15,CPUVAR(SCRATCH) + movq CPUVAR(CURLWP),%r15 + movq L_PCB(%r15),%r15 + movq PCB_RSP0(%r15),%r15 /* LWP's kernel stack pointer */ + + /* Make stack look like an 'int nn' frame */ + movq $(LSEL(LUDATA_SEL, SEL_UPL)),SP(TF_SS,%r15) /* user %ss */ + movq %rsp,SP(TF_RSP,%r15) /* user %rsp */ + movq %r11,SP(TF_RFLAGS,%r15) /* user %rflags */ + movq $(LSEL(LUCODE_SEL, SEL_UPL)),SP(TF_CS,%r15) /* user %cs */ + movq %rcx,SP(TF_RIP,%r15) /* user %rip */ + leaq SP(0,%r15),%rsp /* %rsp now valid after frame */ - leaq SP(0),%rsp /* %rsp now valid after frame */ -#ifdef SVS - movq SVS_UTLS+UTLS_SCRATCH,%rax -#else - movq CPUVAR(SCRATCH),%r15 -#endif + movq CPUVAR(SCRATCH),%r15 + .endif + +#undef SP movq $2,TF_ERR(%rsp) /* syscall instruction size */ movq $T_ASTFLT,TF_TRAPNO(%rsp) @@ -1406,9 +1425,18 @@ IDTVEC(syscall) movw $GSEL(GUDATA_SEL, SEL_UPL),TF_ES(%rsp) movw $0,TF_FS(%rsp) movw $0,TF_GS(%rsp) - SVS_ENTER + .if \is_svs + SVS_ENTER + .endif jmp handle_syscall -IDTVEC_END(syscall) +IDTVEC_END(\name) +.endm + +SYSCALL_ENTRY syscall,is_svs=0 + + TEXT_USER_BEGIN + +SYSCALL_ENTRY syscall_svs,is_svs=1 IDTVEC(syscall32) sysret /* go away please */ @@ -1559,3 +1587,47 @@ do_mov_gs: do_iret: iretq END(intrfastexit) + + TEXT_USER_END + +#ifdef SVS + .globl svs_enter + .globl svs_enter_end + .globl svs_enter_altstack + .globl svs_enter_altstack_end + .globl svs_leave + .globl svs_leave_end + .globl svs_leave_altstack + .globl svs_leave_altstack_end + +LABEL(svs_enter) + movq SVS_UTLS+UTLS_KPDIRPA,%rax + movq %rax,%cr3 + movq CPUVAR(KRSP0),%rsp +LABEL(svs_enter_end) + +LABEL(svs_enter_altstack) + testb $SEL_UPL,TF_CS(%rsp) + jz 1234f + movq SVS_UTLS+UTLS_KPDIRPA,%rax + movq %rax,%cr3 +1234: +LABEL(svs_enter_altstack_end) + +LABEL(svs_leave) + testb $SEL_UPL,TF_CS(%rsp) + jz 1234f + movq CPUVAR(URSP0),%rsp + movq CPUVAR(UPDIRPA),%rax + movq %rax,%cr3 +1234: +LABEL(svs_leave_end) + +LABEL(svs_leave_altstack) + testb $SEL_UPL,TF_CS(%rsp) + jz 1234f + movq CPUVAR(UPDIRPA),%rax + movq %rax,%cr3 +1234: +LABEL(svs_leave_altstack_end) +#endif Index: src/sys/arch/amd64/include/frameasm.h diff -u src/sys/arch/amd64/include/frameasm.h:1.34 src/sys/arch/amd64/include/frameasm.h:1.35 --- src/sys/arch/amd64/include/frameasm.h:1.34 Sat Jan 27 18:27:08 2018 +++ src/sys/arch/amd64/include/frameasm.h Thu Feb 22 08:56:51 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: frameasm.h,v 1.34 2018/01/27 18:27:08 maxv Exp $ */ +/* $NetBSD: frameasm.h,v 1.35 2018/02/22 08:56:51 maxv Exp $ */ #ifndef _AMD64_MACHINE_FRAMEASM_H #define _AMD64_MACHINE_FRAMEASM_H @@ -40,6 +40,10 @@ #define HP_NAME_STAC 2 #define HP_NAME_NOLOCK 3 #define HP_NAME_RETFENCE 4 +#define HP_NAME_SVS_ENTER 5 +#define HP_NAME_SVS_LEAVE 6 +#define HP_NAME_SVS_ENTER_ALT 7 +#define HP_NAME_SVS_LEAVE_ALT 8 #define HOTPATCH(name, size) \ 123: ; \ @@ -107,32 +111,30 @@ #define UTLS_SCRATCH 8 #define UTLS_RSP0 16 +#define SVS_ENTER_BYTES 22 #define SVS_ENTER \ - movq SVS_UTLS+UTLS_KPDIRPA,%rax ; \ - movq %rax,%cr3 ; \ - movq CPUVAR(KRSP0),%rsp + HOTPATCH(HP_NAME_SVS_ENTER, SVS_ENTER_BYTES) ; \ + .byte 0xEB, (SVS_ENTER_BYTES-2) /* jmp */ ; \ + .fill (SVS_ENTER_BYTES-2),1,0xCC +#define SVS_LEAVE_BYTES 31 #define SVS_LEAVE \ - testb $SEL_UPL,TF_CS(%rsp) ; \ - jz 1234f ; \ - movq CPUVAR(URSP0),%rsp ; \ - movq CPUVAR(UPDIRPA),%rax ; \ - movq %rax,%cr3 ; \ -1234: + HOTPATCH(HP_NAME_SVS_LEAVE, SVS_LEAVE_BYTES) ; \ + .byte 0xEB, (SVS_LEAVE_BYTES-2) /* jmp */ ; \ + .fill (SVS_LEAVE_BYTES-2),1,0xCC +#define SVS_ENTER_ALT_BYTES 23 #define SVS_ENTER_ALTSTACK \ - testb $SEL_UPL,TF_CS(%rsp) ; \ - jz 1234f ; \ - movq SVS_UTLS+UTLS_KPDIRPA,%rax ; \ - movq %rax,%cr3 ; \ -1234: + HOTPATCH(HP_NAME_SVS_ENTER_ALT, SVS_ENTER_ALT_BYTES) ; \ + .byte 0xEB, (SVS_ENTER_ALT_BYTES-2) /* jmp */ ; \ + .fill (SVS_ENTER_ALT_BYTES-2),1,0xCC +#define SVS_LEAVE_ALT_BYTES 22 #define SVS_LEAVE_ALTSTACK \ - testb $SEL_UPL,TF_CS(%rsp) ; \ - jz 1234f ; \ - movq CPUVAR(UPDIRPA),%rax ; \ - movq %rax,%cr3 ; \ -1234: + HOTPATCH(HP_NAME_SVS_LEAVE_ALT, SVS_LEAVE_ALT_BYTES) ; \ + .byte 0xEB, (SVS_LEAVE_ALT_BYTES-2) /* jmp */ ; \ + .fill (SVS_LEAVE_ALT_BYTES-2),1,0xCC + #else #define SVS_ENTER /* nothing */ #define SVS_LEAVE /* nothing */ Index: src/sys/arch/x86/x86/cpu.c diff -u src/sys/arch/x86/x86/cpu.c:1.147 src/sys/arch/x86/x86/cpu.c:1.148 --- src/sys/arch/x86/x86/cpu.c:1.147 Sat Jan 27 09:33:25 2018 +++ src/sys/arch/x86/x86/cpu.c Thu Feb 22 08:56:52 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.c,v 1.147 2018/01/27 09:33:25 maxv Exp $ */ +/* $NetBSD: cpu.c,v 1.148 2018/02/22 08:56:52 maxv Exp $ */ /* * Copyright (c) 2000-2012 NetBSD Foundation, Inc. @@ -62,7 +62,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.147 2018/01/27 09:33:25 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.148 2018/02/22 08:56:52 maxv Exp $"); #include "opt_ddb.h" #include "opt_mpbios.h" /* for MPDEBUG */ @@ -1090,7 +1090,7 @@ mp_cpu_start_cleanup(struct cpu_info *ci #ifdef __x86_64__ typedef void (vector)(void); -extern vector Xsyscall, Xsyscall32; +extern vector Xsyscall, Xsyscall32, Xsyscall_svs; #endif void @@ -1104,6 +1104,11 @@ cpu_init_msrs(struct cpu_info *ci, bool wrmsr(MSR_CSTAR, (uint64_t)Xsyscall32); wrmsr(MSR_SFMASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D|PSL_AC); +#ifdef SVS + if (svs_enabled) + wrmsr(MSR_LSTAR, (uint64_t)Xsyscall_svs); +#endif + if (full) { wrmsr(MSR_FSBASE, 0); wrmsr(MSR_GSBASE, (uint64_t)ci); Index: src/sys/arch/x86/x86/patch.c diff -u src/sys/arch/x86/x86/patch.c:1.31 src/sys/arch/x86/x86/patch.c:1.32 --- src/sys/arch/x86/x86/patch.c:1.31 Sat Jan 27 09:33:25 2018 +++ src/sys/arch/x86/x86/patch.c Thu Feb 22 08:56:52 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: patch.c,v 1.31 2018/01/27 09:33:25 maxv Exp $ */ +/* $NetBSD: patch.c,v 1.32 2018/02/22 08:56:52 maxv Exp $ */ /*- * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc. @@ -34,7 +34,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: patch.c,v 1.31 2018/01/27 09:33:25 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: patch.c,v 1.32 2018/02/22 08:56:52 maxv Exp $"); #include "opt_lockdebug.h" #ifdef i386 @@ -298,6 +298,38 @@ x86_patch(bool early) x86_hotpatch(HP_NAME_STAC, stac_bytes, sizeof(stac_bytes)); } +#ifdef SVS + if (early && cpu_vendor == CPUVENDOR_INTEL) { + extern uint8_t svs_enter, svs_enter_end; + extern uint8_t svs_enter_altstack, svs_enter_altstack_end; + extern uint8_t svs_leave, svs_leave_end; + extern uint8_t svs_leave_altstack, svs_leave_altstack_end; + extern bool svs_enabled; + uint8_t *bytes; + size_t size; + + svs_enabled = true; + + bytes = &svs_enter; + size = (size_t)&svs_enter_end - (size_t)&svs_enter; + x86_hotpatch(HP_NAME_SVS_ENTER, bytes, size); + + bytes = &svs_enter_altstack; + size = (size_t)&svs_enter_altstack_end - + (size_t)&svs_enter_altstack; + x86_hotpatch(HP_NAME_SVS_ENTER_ALT, bytes, size); + + bytes = &svs_leave; + size = (size_t)&svs_leave_end - (size_t)&svs_leave; + x86_hotpatch(HP_NAME_SVS_LEAVE, bytes, size); + + bytes = &svs_leave_altstack; + size = (size_t)&svs_leave_altstack_end - + (size_t)&svs_leave_altstack; + x86_hotpatch(HP_NAME_SVS_LEAVE_ALT, bytes, size); + } +#endif + /* Write back and invalidate cache, flush pipelines. */ wbinvd(); x86_flush(); Index: src/sys/arch/x86/x86/svs.c diff -u src/sys/arch/x86/x86/svs.c:1.3 src/sys/arch/x86/x86/svs.c:1.4 --- src/sys/arch/x86/x86/svs.c:1.3 Sun Feb 18 14:07:29 2018 +++ src/sys/arch/x86/x86/svs.c Thu Feb 22 08:56:52 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: svs.c,v 1.3 2018/02/18 14:07:29 maxv Exp $ */ +/* $NetBSD: svs.c,v 1.4 2018/02/22 08:56:52 maxv Exp $ */ /* * Copyright (c) 2018 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: svs.c,v 1.3 2018/02/18 14:07:29 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: svs.c,v 1.4 2018/02/22 08:56:52 maxv Exp $"); #include "opt_svs.h" @@ -157,7 +157,7 @@ svs_page_add(struct cpu_info *ci, vaddr_ if (srcpde[idx] & PG_PS) { pa = srcpde[idx] & PG_2MFRAME; pa += (paddr_t)(va % NBPD_L2); - pde = (srcpde[idx] & ~(PG_PS|PG_2MFRAME)) | pa; + pde = (srcpde[idx] & ~(PG_G|PG_PS|PG_2MFRAME)) | pa; if (pmap_valid_entry(dstpde[pidx])) { panic("%s: L1 page already mapped", __func__); @@ -177,7 +177,7 @@ svs_page_add(struct cpu_info *ci, vaddr_ if (pmap_valid_entry(dstpde[pidx])) { panic("%s: L1 page already mapped", __func__); } - dstpde[pidx] = srcpde[idx]; + dstpde[pidx] = srcpde[idx] & ~(PG_G); } static void @@ -319,6 +319,7 @@ svs_pmap_sync(struct pmap *pmap, int ind struct cpu_info *ci; cpuid_t cid; + KASSERT(svs_enabled); KASSERT(pmap != NULL); KASSERT(pmap != pmap_kernel()); KASSERT(mutex_owned(pmap->pm_lock)); @@ -351,6 +352,8 @@ svs_lwp_switch(struct lwp *oldlwp, struc uintptr_t rsp0; vaddr_t va; + KASSERT(svs_enabled); + if (newlwp->l_flag & LW_SYSTEM) { return; } @@ -505,7 +508,7 @@ void svs_init(void); void svs_init(void) { - svs_enabled = true; - svs_pgg_update(false); + if (svs_enabled) + svs_pgg_update(false); } Index: src/sys/arch/x86/x86/x86_machdep.c diff -u src/sys/arch/x86/x86/x86_machdep.c:1.103 src/sys/arch/x86/x86/x86_machdep.c:1.104 --- src/sys/arch/x86/x86/x86_machdep.c:1.103 Sat Feb 17 17:44:09 2018 +++ src/sys/arch/x86/x86/x86_machdep.c Thu Feb 22 08:56:52 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: x86_machdep.c,v 1.103 2018/02/17 17:44:09 maxv Exp $ */ +/* $NetBSD: x86_machdep.c,v 1.104 2018/02/22 08:56:52 maxv Exp $ */ /*- * Copyright (c) 2002, 2006, 2007 YAMAMOTO Takashi, @@ -31,7 +31,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: x86_machdep.c,v 1.103 2018/02/17 17:44:09 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: x86_machdep.c,v 1.104 2018/02/22 08:56:52 maxv Exp $"); #include "opt_modular.h" #include "opt_physmem.h" @@ -1259,6 +1259,14 @@ SYSCTL_SETUP(sysctl_machdep_setup, "sysc sysctl_machdep_tsc_enable, 0, &tsc_user_enabled, 0, CTL_MACHDEP, CTL_CREATE, CTL_EOL); #endif +#ifdef SVS + sysctl_createv(clog, 0, NULL, NULL, + CTLFLAG_PERMANENT, + CTLTYPE_BOOL, "svs_enabled", + SYSCTL_DESCR("Whether the kernel uses SVS"), + NULL, 0, &svs_enabled, 0, + CTL_MACHDEP, CTL_CREATE, CTL_EOL); +#endif /* None of these can ever change once the system has booted */ const_sysctl(clog, "fpu_present", CTLTYPE_INT, i386_fpu_present,