Module Name:    src
Committed By:   maxv
Date:           Thu Feb 22 08:56:52 UTC 2018

Modified Files:
        src/sys/arch/amd64/amd64: locore.S
        src/sys/arch/amd64/include: frameasm.h
        src/sys/arch/x86/x86: cpu.c patch.c svs.c x86_machdep.c

Log Message:
Add a dynamic detection for SVS.

The SVS_* macros are now compiled as skip-noopt. When the system boots, if
the cpu is from Intel, they are hotpatched to their real content.
Typically:

                jmp     1f
                int3
                int3
                int3
                ... int3 ...
        1:

gets hotpatched to:

                movq    SVS_UTLS+UTLS_KPDIRPA,%rax
                movq    %rax,%cr3
                movq    CPUVAR(KRSP0),%rsp

These two chunks of code being of the exact same size. We put int3 (0xCC)
to make sure we never execute there.

In the non-SVS (ie non-Intel) case, all it costs is one jump. Given that
the SVS_* macros are small, this jump will likely leave us in the same
icache line, so it's pretty fast.

The syscall entry point is special, because there we use a scratch uint64_t
not in curcpu but in the UTLS page, and it's difficult to hotpatch this
properly. So instead of hotpatching we declare the entry point as an ASM
macro, and define two functions: syscall and syscall_svs, the latter being
the one used in the SVS case.

While here 'syscall' is optimized not to contain an SVS_ENTER - this way
we don't even need to do a jump on the non-SVS case.

When adding pages in the user page tables, make sure we don't have PG_G,
now that it's dynamic.

A read-only sysctl is added, machdep.svs_enabled, that tells whether the
kernel uses SVS or not.

More changes to come, svs_init() is not very clean.


To generate a diff of this commit:
cvs rdiff -u -r1.151 -r1.152 src/sys/arch/amd64/amd64/locore.S
cvs rdiff -u -r1.34 -r1.35 src/sys/arch/amd64/include/frameasm.h
cvs rdiff -u -r1.147 -r1.148 src/sys/arch/x86/x86/cpu.c
cvs rdiff -u -r1.31 -r1.32 src/sys/arch/x86/x86/patch.c
cvs rdiff -u -r1.3 -r1.4 src/sys/arch/x86/x86/svs.c
cvs rdiff -u -r1.103 -r1.104 src/sys/arch/x86/x86/x86_machdep.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/amd64/amd64/locore.S
diff -u src/sys/arch/amd64/amd64/locore.S:1.151 src/sys/arch/amd64/amd64/locore.S:1.152
--- src/sys/arch/amd64/amd64/locore.S:1.151	Sun Feb 18 14:07:29 2018
+++ src/sys/arch/amd64/amd64/locore.S	Thu Feb 22 08:56:51 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: locore.S,v 1.151 2018/02/18 14:07:29 maxv Exp $	*/
+/*	$NetBSD: locore.S,v 1.152 2018/02/22 08:56:51 maxv Exp $	*/
 
 /*
  * Copyright-o-rama!
@@ -1117,17 +1117,27 @@ ENTRY(cpu_switchto)
 
 	/* Switch ring0 stack */
 #ifdef SVS
+	movb	_C_LABEL(svs_enabled),%al
+	testb	%al,%al
+	jz	.Lno_svs_switch
+
 	movq	CPUVAR(RSP0),%rax
 	movq	CPUVAR(TSS),%rdi
 	movq	%rax,TSS_RSP0(%rdi)
-#elif !defined(XEN)
+	jmp	.Lring0_switched
+
+.Lno_svs_switch:
+#endif
+
+#if !defined(XEN)
 	movq	PCB_RSP0(%r14),%rax
 	movq	CPUVAR(TSS),%rdi
 	movq	%rax,TSS_RSP0(%rdi)
 #else
 	movq	%r14,%rdi
-	callq	_C_LABEL(x86_64_switch_context);
+	callq	_C_LABEL(x86_64_switch_context)
 #endif
+.Lring0_switched:
 
 	/* Don't bother with the rest if switching to a system process. */
 	testl	$LW_SYSTEM,L_FLAG(%r12)
@@ -1347,9 +1357,10 @@ END(lwp_trampoline)
 /*
  * Entry points of the 'syscall' instruction, 64bit and 32bit mode.
  */
-	TEXT_USER_BEGIN
 
-IDTVEC(syscall)
+
+.macro	SYSCALL_ENTRY	name,is_svs
+IDTVEC(\name)
 #ifndef XEN
 	/*
 	 * The user %rip is in %rcx and the user %rflags in %r11. The kernel %cs
@@ -1365,31 +1376,39 @@ IDTVEC(syscall)
 	 */
 	swapgs
 
-#ifdef SVS
-	movq	%rax,SVS_UTLS+UTLS_SCRATCH
-	movq	SVS_UTLS+UTLS_RSP0,%rax
-#define SP(x)	(x)-(TF_SS+8)(%rax)
-#else
-	movq	%r15,CPUVAR(SCRATCH)
-	movq	CPUVAR(CURLWP),%r15
-	movq	L_PCB(%r15),%r15
-	movq	PCB_RSP0(%r15),%r15	/* LWP's kernel stack pointer */
-#define SP(x)	(x)-(TF_SS+8)(%r15)
-#endif
+#define SP(x,reg)	(x)-(TF_SS+8)(reg)
 
-	/* Make stack look like an 'int nn' frame */
-	movq	$(LSEL(LUDATA_SEL, SEL_UPL)),SP(TF_SS)	/* user %ss */
-	movq	%rsp,SP(TF_RSP)				/* user %rsp */
-	movq	%r11,SP(TF_RFLAGS)			/* user %rflags */
-	movq	$(LSEL(LUCODE_SEL, SEL_UPL)),SP(TF_CS)	/* user %cs */
-	movq	%rcx,SP(TF_RIP)				/* user %rip */
+	.if	\is_svs
+		movq	%rax,SVS_UTLS+UTLS_SCRATCH
+		movq	SVS_UTLS+UTLS_RSP0,%rax
+
+		/* Make stack look like an 'int nn' frame */
+		movq	$(LSEL(LUDATA_SEL, SEL_UPL)),SP(TF_SS,%rax)	/* user %ss */
+		movq	%rsp,SP(TF_RSP,%rax)				/* user %rsp */
+		movq	%r11,SP(TF_RFLAGS,%rax)				/* user %rflags */
+		movq	$(LSEL(LUCODE_SEL, SEL_UPL)),SP(TF_CS,%rax)	/* user %cs */
+		movq	%rcx,SP(TF_RIP,%rax)				/* user %rip */
+		leaq	SP(0,%rax),%rsp			/* %rsp now valid after frame */
+
+		movq	SVS_UTLS+UTLS_SCRATCH,%rax
+	.else
+		movq	%r15,CPUVAR(SCRATCH)
+		movq	CPUVAR(CURLWP),%r15
+		movq	L_PCB(%r15),%r15
+		movq	PCB_RSP0(%r15),%r15	/* LWP's kernel stack pointer */
+
+		/* Make stack look like an 'int nn' frame */
+		movq	$(LSEL(LUDATA_SEL, SEL_UPL)),SP(TF_SS,%r15)	/* user %ss */
+		movq	%rsp,SP(TF_RSP,%r15)				/* user %rsp */
+		movq	%r11,SP(TF_RFLAGS,%r15)				/* user %rflags */
+		movq	$(LSEL(LUCODE_SEL, SEL_UPL)),SP(TF_CS,%r15)	/* user %cs */
+		movq	%rcx,SP(TF_RIP,%r15)				/* user %rip */
+		leaq	SP(0,%r15),%rsp			/* %rsp now valid after frame */
 
-	leaq	SP(0),%rsp		/* %rsp now valid after frame */
-#ifdef SVS
-	movq	SVS_UTLS+UTLS_SCRATCH,%rax
-#else
-	movq	CPUVAR(SCRATCH),%r15
-#endif
+		movq	CPUVAR(SCRATCH),%r15
+	.endif
+
+#undef SP
 
 	movq	$2,TF_ERR(%rsp)		/* syscall instruction size */
 	movq	$T_ASTFLT,TF_TRAPNO(%rsp)
@@ -1406,9 +1425,18 @@ IDTVEC(syscall)
 	movw	$GSEL(GUDATA_SEL, SEL_UPL),TF_ES(%rsp)
 	movw	$0,TF_FS(%rsp)
 	movw	$0,TF_GS(%rsp)
-	SVS_ENTER
+	.if	\is_svs
+		SVS_ENTER
+	.endif
 	jmp	handle_syscall
-IDTVEC_END(syscall)
+IDTVEC_END(\name)
+.endm
+
+SYSCALL_ENTRY	syscall,is_svs=0
+
+	TEXT_USER_BEGIN
+
+SYSCALL_ENTRY	syscall_svs,is_svs=1
 
 IDTVEC(syscall32)
 	sysret		/* go away please */
@@ -1559,3 +1587,47 @@ do_mov_gs:
 do_iret:
 	iretq
 END(intrfastexit)
+
+	TEXT_USER_END
+
+#ifdef SVS
+	.globl	svs_enter
+	.globl	svs_enter_end
+	.globl	svs_enter_altstack
+	.globl	svs_enter_altstack_end
+	.globl	svs_leave
+	.globl	svs_leave_end
+	.globl	svs_leave_altstack
+	.globl	svs_leave_altstack_end
+
+LABEL(svs_enter)
+	movq	SVS_UTLS+UTLS_KPDIRPA,%rax
+	movq	%rax,%cr3
+	movq	CPUVAR(KRSP0),%rsp
+LABEL(svs_enter_end)
+
+LABEL(svs_enter_altstack)
+	testb	$SEL_UPL,TF_CS(%rsp)
+	jz	1234f
+	movq	SVS_UTLS+UTLS_KPDIRPA,%rax
+	movq	%rax,%cr3
+1234:
+LABEL(svs_enter_altstack_end)
+
+LABEL(svs_leave)
+	testb	$SEL_UPL,TF_CS(%rsp)
+	jz	1234f
+	movq	CPUVAR(URSP0),%rsp
+	movq	CPUVAR(UPDIRPA),%rax
+	movq	%rax,%cr3
+1234:
+LABEL(svs_leave_end)
+
+LABEL(svs_leave_altstack)
+	testb	$SEL_UPL,TF_CS(%rsp)
+	jz	1234f
+	movq	CPUVAR(UPDIRPA),%rax
+	movq	%rax,%cr3
+1234:
+LABEL(svs_leave_altstack_end)
+#endif

Index: src/sys/arch/amd64/include/frameasm.h
diff -u src/sys/arch/amd64/include/frameasm.h:1.34 src/sys/arch/amd64/include/frameasm.h:1.35
--- src/sys/arch/amd64/include/frameasm.h:1.34	Sat Jan 27 18:27:08 2018
+++ src/sys/arch/amd64/include/frameasm.h	Thu Feb 22 08:56:51 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: frameasm.h,v 1.34 2018/01/27 18:27:08 maxv Exp $	*/
+/*	$NetBSD: frameasm.h,v 1.35 2018/02/22 08:56:51 maxv Exp $	*/
 
 #ifndef _AMD64_MACHINE_FRAMEASM_H
 #define _AMD64_MACHINE_FRAMEASM_H
@@ -40,6 +40,10 @@
 #define HP_NAME_STAC		2
 #define HP_NAME_NOLOCK		3
 #define HP_NAME_RETFENCE	4
+#define HP_NAME_SVS_ENTER	5
+#define HP_NAME_SVS_LEAVE	6
+#define HP_NAME_SVS_ENTER_ALT	7
+#define HP_NAME_SVS_LEAVE_ALT	8
 
 #define HOTPATCH(name, size) \
 123:						; \
@@ -107,32 +111,30 @@
 #define UTLS_SCRATCH		8
 #define UTLS_RSP0		16
 
+#define SVS_ENTER_BYTES	22
 #define SVS_ENTER \
-	movq	SVS_UTLS+UTLS_KPDIRPA,%rax	; \
-	movq	%rax,%cr3			; \
-	movq	CPUVAR(KRSP0),%rsp
+	HOTPATCH(HP_NAME_SVS_ENTER, SVS_ENTER_BYTES)	; \
+	.byte 0xEB, (SVS_ENTER_BYTES-2)	/* jmp */	; \
+	.fill	(SVS_ENTER_BYTES-2),1,0xCC
 
+#define SVS_LEAVE_BYTES	31
 #define SVS_LEAVE \
-	testb	$SEL_UPL,TF_CS(%rsp)		; \
-	jz	1234f				; \
-	movq	CPUVAR(URSP0),%rsp		; \
-	movq	CPUVAR(UPDIRPA),%rax		; \
-	movq	%rax,%cr3			; \
-1234:
+	HOTPATCH(HP_NAME_SVS_LEAVE, SVS_LEAVE_BYTES)	; \
+	.byte 0xEB, (SVS_LEAVE_BYTES-2)	/* jmp */	; \
+	.fill	(SVS_LEAVE_BYTES-2),1,0xCC
 
+#define SVS_ENTER_ALT_BYTES	23
 #define SVS_ENTER_ALTSTACK \
-	testb	$SEL_UPL,TF_CS(%rsp)		; \
-	jz	1234f				; \
-	movq	SVS_UTLS+UTLS_KPDIRPA,%rax	; \
-	movq	%rax,%cr3			; \
-1234:
+	HOTPATCH(HP_NAME_SVS_ENTER_ALT, SVS_ENTER_ALT_BYTES)	; \
+	.byte 0xEB, (SVS_ENTER_ALT_BYTES-2)	/* jmp */	; \
+	.fill	(SVS_ENTER_ALT_BYTES-2),1,0xCC
 
+#define SVS_LEAVE_ALT_BYTES	22
 #define SVS_LEAVE_ALTSTACK \
-	testb	$SEL_UPL,TF_CS(%rsp)		; \
-	jz	1234f				; \
-	movq	CPUVAR(UPDIRPA),%rax		; \
-	movq	%rax,%cr3			; \
-1234:
+	HOTPATCH(HP_NAME_SVS_LEAVE_ALT, SVS_LEAVE_ALT_BYTES)	; \
+	.byte 0xEB, (SVS_LEAVE_ALT_BYTES-2)	/* jmp */	; \
+	.fill	(SVS_LEAVE_ALT_BYTES-2),1,0xCC
+
 #else
 #define SVS_ENTER	/* nothing */
 #define SVS_LEAVE	/* nothing */

Index: src/sys/arch/x86/x86/cpu.c
diff -u src/sys/arch/x86/x86/cpu.c:1.147 src/sys/arch/x86/x86/cpu.c:1.148
--- src/sys/arch/x86/x86/cpu.c:1.147	Sat Jan 27 09:33:25 2018
+++ src/sys/arch/x86/x86/cpu.c	Thu Feb 22 08:56:52 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.c,v 1.147 2018/01/27 09:33:25 maxv Exp $	*/
+/*	$NetBSD: cpu.c,v 1.148 2018/02/22 08:56:52 maxv Exp $	*/
 
 /*
  * Copyright (c) 2000-2012 NetBSD Foundation, Inc.
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.147 2018/01/27 09:33:25 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.148 2018/02/22 08:56:52 maxv Exp $");
 
 #include "opt_ddb.h"
 #include "opt_mpbios.h"		/* for MPDEBUG */
@@ -1090,7 +1090,7 @@ mp_cpu_start_cleanup(struct cpu_info *ci
 
 #ifdef __x86_64__
 typedef void (vector)(void);
-extern vector Xsyscall, Xsyscall32;
+extern vector Xsyscall, Xsyscall32, Xsyscall_svs;
 #endif
 
 void
@@ -1104,6 +1104,11 @@ cpu_init_msrs(struct cpu_info *ci, bool 
 	wrmsr(MSR_CSTAR, (uint64_t)Xsyscall32);
 	wrmsr(MSR_SFMASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D|PSL_AC);
 
+#ifdef SVS
+	if (svs_enabled)
+		wrmsr(MSR_LSTAR, (uint64_t)Xsyscall_svs);
+#endif
+
 	if (full) {
 		wrmsr(MSR_FSBASE, 0);
 		wrmsr(MSR_GSBASE, (uint64_t)ci);

Index: src/sys/arch/x86/x86/patch.c
diff -u src/sys/arch/x86/x86/patch.c:1.31 src/sys/arch/x86/x86/patch.c:1.32
--- src/sys/arch/x86/x86/patch.c:1.31	Sat Jan 27 09:33:25 2018
+++ src/sys/arch/x86/x86/patch.c	Thu Feb 22 08:56:52 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: patch.c,v 1.31 2018/01/27 09:33:25 maxv Exp $	*/
+/*	$NetBSD: patch.c,v 1.32 2018/02/22 08:56:52 maxv Exp $	*/
 
 /*-
  * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: patch.c,v 1.31 2018/01/27 09:33:25 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: patch.c,v 1.32 2018/02/22 08:56:52 maxv Exp $");
 
 #include "opt_lockdebug.h"
 #ifdef i386
@@ -298,6 +298,38 @@ x86_patch(bool early)
 		x86_hotpatch(HP_NAME_STAC, stac_bytes, sizeof(stac_bytes));
 	}
 
+#ifdef SVS
+	if (early && cpu_vendor == CPUVENDOR_INTEL) {
+		extern uint8_t svs_enter, svs_enter_end;
+		extern uint8_t svs_enter_altstack, svs_enter_altstack_end;
+		extern uint8_t svs_leave, svs_leave_end;
+		extern uint8_t svs_leave_altstack, svs_leave_altstack_end;
+		extern bool svs_enabled;
+		uint8_t *bytes;
+		size_t size;
+
+		svs_enabled = true;
+
+		bytes = &svs_enter;
+		size = (size_t)&svs_enter_end - (size_t)&svs_enter;
+		x86_hotpatch(HP_NAME_SVS_ENTER, bytes, size);
+
+		bytes = &svs_enter_altstack;
+		size = (size_t)&svs_enter_altstack_end -
+		    (size_t)&svs_enter_altstack;
+		x86_hotpatch(HP_NAME_SVS_ENTER_ALT, bytes, size);
+
+		bytes = &svs_leave;
+		size = (size_t)&svs_leave_end - (size_t)&svs_leave;
+		x86_hotpatch(HP_NAME_SVS_LEAVE, bytes, size);
+
+		bytes = &svs_leave_altstack;
+		size = (size_t)&svs_leave_altstack_end -
+		    (size_t)&svs_leave_altstack;
+		x86_hotpatch(HP_NAME_SVS_LEAVE_ALT, bytes, size);
+	}
+#endif
+
 	/* Write back and invalidate cache, flush pipelines. */
 	wbinvd();
 	x86_flush();

Index: src/sys/arch/x86/x86/svs.c
diff -u src/sys/arch/x86/x86/svs.c:1.3 src/sys/arch/x86/x86/svs.c:1.4
--- src/sys/arch/x86/x86/svs.c:1.3	Sun Feb 18 14:07:29 2018
+++ src/sys/arch/x86/x86/svs.c	Thu Feb 22 08:56:52 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: svs.c,v 1.3 2018/02/18 14:07:29 maxv Exp $	*/
+/*	$NetBSD: svs.c,v 1.4 2018/02/22 08:56:52 maxv Exp $	*/
 
 /*
  * Copyright (c) 2018 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: svs.c,v 1.3 2018/02/18 14:07:29 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: svs.c,v 1.4 2018/02/22 08:56:52 maxv Exp $");
 
 #include "opt_svs.h"
 
@@ -157,7 +157,7 @@ svs_page_add(struct cpu_info *ci, vaddr_
 	if (srcpde[idx] & PG_PS) {
 		pa = srcpde[idx] & PG_2MFRAME;
 		pa += (paddr_t)(va % NBPD_L2);
-		pde = (srcpde[idx] & ~(PG_PS|PG_2MFRAME)) | pa;
+		pde = (srcpde[idx] & ~(PG_G|PG_PS|PG_2MFRAME)) | pa;
 
 		if (pmap_valid_entry(dstpde[pidx])) {
 			panic("%s: L1 page already mapped", __func__);
@@ -177,7 +177,7 @@ svs_page_add(struct cpu_info *ci, vaddr_
 	if (pmap_valid_entry(dstpde[pidx])) {
 		panic("%s: L1 page already mapped", __func__);
 	}
-	dstpde[pidx] = srcpde[idx];
+	dstpde[pidx] = srcpde[idx] & ~(PG_G);
 }
 
 static void
@@ -319,6 +319,7 @@ svs_pmap_sync(struct pmap *pmap, int ind
 	struct cpu_info *ci;
 	cpuid_t cid;
 
+	KASSERT(svs_enabled);
 	KASSERT(pmap != NULL);
 	KASSERT(pmap != pmap_kernel());
 	KASSERT(mutex_owned(pmap->pm_lock));
@@ -351,6 +352,8 @@ svs_lwp_switch(struct lwp *oldlwp, struc
 	uintptr_t rsp0;
 	vaddr_t va;
 
+	KASSERT(svs_enabled);
+
 	if (newlwp->l_flag & LW_SYSTEM) {
 		return;
 	}
@@ -505,7 +508,7 @@ void svs_init(void);
 void
 svs_init(void)
 {
-	svs_enabled = true;
-	svs_pgg_update(false);
+	if (svs_enabled)
+		svs_pgg_update(false);
 }
 

Index: src/sys/arch/x86/x86/x86_machdep.c
diff -u src/sys/arch/x86/x86/x86_machdep.c:1.103 src/sys/arch/x86/x86/x86_machdep.c:1.104
--- src/sys/arch/x86/x86/x86_machdep.c:1.103	Sat Feb 17 17:44:09 2018
+++ src/sys/arch/x86/x86/x86_machdep.c	Thu Feb 22 08:56:52 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: x86_machdep.c,v 1.103 2018/02/17 17:44:09 maxv Exp $	*/
+/*	$NetBSD: x86_machdep.c,v 1.104 2018/02/22 08:56:52 maxv Exp $	*/
 
 /*-
  * Copyright (c) 2002, 2006, 2007 YAMAMOTO Takashi,
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: x86_machdep.c,v 1.103 2018/02/17 17:44:09 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: x86_machdep.c,v 1.104 2018/02/22 08:56:52 maxv Exp $");
 
 #include "opt_modular.h"
 #include "opt_physmem.h"
@@ -1259,6 +1259,14 @@ SYSCTL_SETUP(sysctl_machdep_setup, "sysc
 		       sysctl_machdep_tsc_enable, 0, &tsc_user_enabled, 0,
 		       CTL_MACHDEP, CTL_CREATE, CTL_EOL);
 #endif
+#ifdef SVS
+	sysctl_createv(clog, 0, NULL, NULL,
+		       CTLFLAG_PERMANENT,
+		       CTLTYPE_BOOL, "svs_enabled",
+		       SYSCTL_DESCR("Whether the kernel uses SVS"),
+		       NULL, 0, &svs_enabled, 0,
+		       CTL_MACHDEP, CTL_CREATE, CTL_EOL);
+#endif
 
 	/* None of these can ever change once the system has booted */
 	const_sysctl(clog, "fpu_present", CTLTYPE_INT, i386_fpu_present,

Reply via email to