Module Name:    src
Committed By:   martin
Date:           Fri Apr  5 07:48:05 UTC 2019

Modified Files:
        src/sys/arch/amd64/amd64 [netbsd-8]: machdep.c netbsd32_machdep.c
        src/sys/compat/linux/arch/amd64 [netbsd-8]: linux_machdep.c
        src/sys/compat/linux32/arch/amd64 [netbsd-8]: linux32_machdep.c

Log Message:
Pull up following revision(s) (requested by maxv):

        sys/arch/amd64/amd64/netbsd32_machdep.c: revision 1.120
        sys/compat/linux/arch/amd64/linux_machdep.c: revision 1.57
        sys/compat/linux32/arch/amd64/linux32_machdep.c: revision 1.44
        sys/arch/amd64/amd64/machdep.c: revision 1.328
        sys/arch/amd64/amd64/machdep.c: revision 1.329

Fix a tiny race in setregs and linux_setregs. Between the moment we set
pcb_flags to zero, and the moment cpu_segregs64_zero resets pcb_gs, we may
be preempted.

If this happens, and if the calling LWP was a 32bit thread, when switching
back to that LWP, the context switcher sees that PCB_COMPAT32 is not set in
pcb_flags and tries to perform a 64bit context switch; but pcb_gs contains
a 32bit GDT descriptor, and not a 64bit GS.base value. The wrmsr therefore
faults because the value is non-canonical, and this fault is fatal.

Rearrange the code so that the update of pcb_flags and pcb_gs/pcb_fs is non
interruptible. This fixes the problem, tested with a reproducer (which
therefore doesn't work anymore).

Likely fixes PR/53993.

Disable preemption when setting PCB_COMPAT32, to prevent a context switch
before cpu_fsgs_reload() finishes, otherwise we write garbage in the GDT.

On NetBSD-current it is harmless, however in NetBSD-8 it might cause
panics, because NetBSD-8 uses the old SegRegs model and under this model
we reload %fs and %gs during switches.


To generate a diff of this commit:
cvs rdiff -u -r1.255.6.8 -r1.255.6.9 src/sys/arch/amd64/amd64/machdep.c
cvs rdiff -u -r1.105.2.2 -r1.105.2.3 \
    src/sys/arch/amd64/amd64/netbsd32_machdep.c
cvs rdiff -u -r1.51.6.1 -r1.51.6.2 \
    src/sys/compat/linux/arch/amd64/linux_machdep.c
cvs rdiff -u -r1.38.6.1 -r1.38.6.2 \
    src/sys/compat/linux32/arch/amd64/linux32_machdep.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/amd64/amd64/machdep.c
diff -u src/sys/arch/amd64/amd64/machdep.c:1.255.6.8 src/sys/arch/amd64/amd64/machdep.c:1.255.6.9
--- src/sys/arch/amd64/amd64/machdep.c:1.255.6.8	Thu Nov 29 08:51:01 2018
+++ src/sys/arch/amd64/amd64/machdep.c	Fri Apr  5 07:48:05 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: machdep.c,v 1.255.6.8 2018/11/29 08:51:01 martin Exp $	*/
+/*	$NetBSD: machdep.c,v 1.255.6.9 2019/04/05 07:48:05 martin Exp $	*/
 
 /*-
  * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011
@@ -111,7 +111,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.255.6.8 2018/11/29 08:51:01 martin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.255.6.9 2019/04/05 07:48:05 martin Exp $");
 
 /* #define XENDEBUG_LOW  */
 
@@ -1350,20 +1350,22 @@ setregs(struct lwp *l, struct exec_packa
 
 	fpu_save_area_clear(l, pack->ep_osversion >= 699002600
 	    ? __NetBSD_NPXCW__ : __NetBSD_COMPAT_NPXCW__);
-	pcb->pcb_flags = 0;
+
 	if (pcb->pcb_dbregs != NULL) {
 		pool_put(&x86_dbregspl, pcb->pcb_dbregs);
 		pcb->pcb_dbregs = NULL;
 	}
 
+	kpreempt_disable();
+	pcb->pcb_flags = 0;
 	l->l_proc->p_flag &= ~PK_32;
-
 	l->l_md.md_flags = MDL_IRET;
+	cpu_fsgs_zero(l);
+	kpreempt_enable();
 
 	tf = l->l_md.md_regs;
 	tf->tf_ds = LSEL(LUDATA_SEL, SEL_UPL);
 	tf->tf_es = LSEL(LUDATA_SEL, SEL_UPL);
-	cpu_fsgs_zero(l);
 	tf->tf_rdi = 0;
 	tf->tf_rsi = 0;
 	tf->tf_rbp = 0;

Index: src/sys/arch/amd64/amd64/netbsd32_machdep.c
diff -u src/sys/arch/amd64/amd64/netbsd32_machdep.c:1.105.2.2 src/sys/arch/amd64/amd64/netbsd32_machdep.c:1.105.2.3
--- src/sys/arch/amd64/amd64/netbsd32_machdep.c:1.105.2.2	Sun Jan 27 18:43:08 2019
+++ src/sys/arch/amd64/amd64/netbsd32_machdep.c	Fri Apr  5 07:48:05 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: netbsd32_machdep.c,v 1.105.2.2 2019/01/27 18:43:08 martin Exp $	*/
+/*	$NetBSD: netbsd32_machdep.c,v 1.105.2.3 2019/04/05 07:48:05 martin Exp $	*/
 
 /*
  * Copyright (c) 2001 Wasabi Systems, Inc.
@@ -36,7 +36,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: netbsd32_machdep.c,v 1.105.2.2 2019/01/27 18:43:08 martin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: netbsd32_machdep.c,v 1.105.2.3 2019/04/05 07:48:05 martin Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_compat_netbsd.h"
@@ -132,9 +132,6 @@ netbsd32_setregs(struct lwp *l, struct e
 
 	netbsd32_adjust_limits(p);
 
-	l->l_md.md_flags |= MDL_COMPAT32;	/* Force iret not sysret */
-	pcb->pcb_flags = PCB_COMPAT32;
-
 	fpu_save_area_clear(l, pack->ep_osversion >= 699002600
 	    ?  __NetBSD_NPXCW__ : __NetBSD_COMPAT_NPXCW__);
 
@@ -143,13 +140,18 @@ netbsd32_setregs(struct lwp *l, struct e
 		pcb->pcb_dbregs = NULL;
 	}
 
+	kpreempt_disable();
+	pcb->pcb_flags = PCB_COMPAT32;
 	p->p_flag |= PK_32;
+	l->l_md.md_flags = MDL_COMPAT32;	/* force iret not sysret */
+	cpu_fsgs_zero(l);
+	cpu_fsgs_reload(l, LSEL(LUDATA32_SEL, SEL_UPL),
+	    LSEL(LUDATA32_SEL, SEL_UPL));
+	kpreempt_enable();
 
 	tf = l->l_md.md_regs;
 	tf->tf_ds = LSEL(LUDATA32_SEL, SEL_UPL);
 	tf->tf_es = LSEL(LUDATA32_SEL, SEL_UPL);
-	cpu_fsgs_zero(l);
-	cpu_fsgs_reload(l, tf->tf_ds, tf->tf_es);
 	tf->tf_rdi = 0;
 	tf->tf_rsi = 0;
 	tf->tf_rbp = 0;

Index: src/sys/compat/linux/arch/amd64/linux_machdep.c
diff -u src/sys/compat/linux/arch/amd64/linux_machdep.c:1.51.6.1 src/sys/compat/linux/arch/amd64/linux_machdep.c:1.51.6.2
--- src/sys/compat/linux/arch/amd64/linux_machdep.c:1.51.6.1	Mon Sep  4 20:41:28 2017
+++ src/sys/compat/linux/arch/amd64/linux_machdep.c	Fri Apr  5 07:48:05 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: linux_machdep.c,v 1.51.6.1 2017/09/04 20:41:28 snj Exp $ */
+/*	$NetBSD: linux_machdep.c,v 1.51.6.2 2019/04/05 07:48:05 martin Exp $ */
 
 /*-
  * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved.
@@ -33,7 +33,7 @@
 
 #include <sys/cdefs.h>
 
-__KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.51.6.1 2017/09/04 20:41:28 snj Exp $");
+__KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.51.6.2 2019/04/05 07:48:05 martin Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -89,11 +89,13 @@ linux_setregs(struct lwp *l, struct exec
 #endif
 
 	fpu_save_area_clear(l, __NetBSD_NPXCW__);
-	pcb->pcb_flags = 0;
 
+	kpreempt_disable();
+	pcb->pcb_flags = 0;
 	l->l_proc->p_flag &= ~PK_32;
-
 	l->l_md.md_flags = MDL_IRET;
+	cpu_fsgs_zero(l);
+	kpreempt_enable();
 
 	tf = l->l_md.md_regs;
 	tf->tf_rax = 0;
@@ -118,7 +120,6 @@ linux_setregs(struct lwp *l, struct exec
 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
 	tf->tf_es = 0;
-	cpu_fsgs_zero(l);
 
 	return;
 }

Index: src/sys/compat/linux32/arch/amd64/linux32_machdep.c
diff -u src/sys/compat/linux32/arch/amd64/linux32_machdep.c:1.38.6.1 src/sys/compat/linux32/arch/amd64/linux32_machdep.c:1.38.6.2
--- src/sys/compat/linux32/arch/amd64/linux32_machdep.c:1.38.6.1	Sat Sep  9 17:01:23 2017
+++ src/sys/compat/linux32/arch/amd64/linux32_machdep.c	Fri Apr  5 07:48:05 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: linux32_machdep.c,v 1.38.6.1 2017/09/09 17:01:23 snj Exp $ */
+/*	$NetBSD: linux32_machdep.c,v 1.38.6.2 2019/04/05 07:48:05 martin Exp $ */
 
 /*-
  * Copyright (c) 2006 Emmanuel Dreyfus, all rights reserved.
@@ -31,7 +31,7 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: linux32_machdep.c,v 1.38.6.1 2017/09/09 17:01:23 snj Exp $");
+__KERNEL_RCSID(0, "$NetBSD: linux32_machdep.c,v 1.38.6.2 2019/04/05 07:48:05 martin Exp $");
 
 #include <sys/param.h>
 #include <sys/proc.h>
@@ -283,10 +283,14 @@ linux32_setregs(struct lwp *l, struct ex
 
 	fpu_save_area_clear(l, __Linux_NPXCW__);
 
-	l->l_md.md_flags |= MDL_COMPAT32;	/* Forces iret not sysret */
+	kpreempt_disable();
 	pcb->pcb_flags = PCB_COMPAT32;
-
 	p->p_flag |= PK_32;
+	l->l_md.md_flags = MDL_COMPAT32;	/* force iret not sysret */
+	cpu_fsgs_zero(l);
+	cpu_fsgs_reload(l, GSEL(GUDATA32_SEL, SEL_UPL),
+	    GSEL(GUDATA32_SEL, SEL_UPL));
+	kpreempt_enable();
 
 	tf = l->l_md.md_regs;
 	tf->tf_rax = 0;
@@ -311,8 +315,6 @@ linux32_setregs(struct lwp *l, struct ex
 	tf->tf_ss = GSEL(GUDATA32_SEL, SEL_UPL);
 	tf->tf_ds = GSEL(GUDATA32_SEL, SEL_UPL);
 	tf->tf_es = GSEL(GUDATA32_SEL, SEL_UPL);
-	cpu_fsgs_zero(l);
-	cpu_fsgs_reload(l, GSEL(GUDATA32_SEL, SEL_UPL), GSEL(GUDATA32_SEL, SEL_UPL));
 }
 
 static void

Reply via email to