Module Name: src Committed By: martin Date: Fri Apr 5 07:48:05 UTC 2019
Modified Files: src/sys/arch/amd64/amd64 [netbsd-8]: machdep.c netbsd32_machdep.c src/sys/compat/linux/arch/amd64 [netbsd-8]: linux_machdep.c src/sys/compat/linux32/arch/amd64 [netbsd-8]: linux32_machdep.c Log Message: Pull up following revision(s) (requested by maxv): sys/arch/amd64/amd64/netbsd32_machdep.c: revision 1.120 sys/compat/linux/arch/amd64/linux_machdep.c: revision 1.57 sys/compat/linux32/arch/amd64/linux32_machdep.c: revision 1.44 sys/arch/amd64/amd64/machdep.c: revision 1.328 sys/arch/amd64/amd64/machdep.c: revision 1.329 Fix a tiny race in setregs and linux_setregs. Between the moment we set pcb_flags to zero, and the moment cpu_segregs64_zero resets pcb_gs, we may be preempted. If this happens, and if the calling LWP was a 32bit thread, when switching back to that LWP, the context switcher sees that PCB_COMPAT32 is not set in pcb_flags and tries to perform a 64bit context switch; but pcb_gs contains a 32bit GDT descriptor, and not a 64bit GS.base value. The wrmsr therefore faults because the value is non-canonical, and this fault is fatal. Rearrange the code so that the update of pcb_flags and pcb_gs/pcb_fs is non interruptible. This fixes the problem, tested with a reproducer (which therefore doesn't work anymore). Likely fixes PR/53993. Disable preemption when setting PCB_COMPAT32, to prevent a context switch before cpu_fsgs_reload() finishes, otherwise we write garbage in the GDT. On NetBSD-current it is harmless, however in NetBSD-8 it might cause panics, because NetBSD-8 uses the old SegRegs model and under this model we reload %fs and %gs during switches. To generate a diff of this commit: cvs rdiff -u -r1.255.6.8 -r1.255.6.9 src/sys/arch/amd64/amd64/machdep.c cvs rdiff -u -r1.105.2.2 -r1.105.2.3 \ src/sys/arch/amd64/amd64/netbsd32_machdep.c cvs rdiff -u -r1.51.6.1 -r1.51.6.2 \ src/sys/compat/linux/arch/amd64/linux_machdep.c cvs rdiff -u -r1.38.6.1 -r1.38.6.2 \ src/sys/compat/linux32/arch/amd64/linux32_machdep.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/amd64/amd64/machdep.c diff -u src/sys/arch/amd64/amd64/machdep.c:1.255.6.8 src/sys/arch/amd64/amd64/machdep.c:1.255.6.9 --- src/sys/arch/amd64/amd64/machdep.c:1.255.6.8 Thu Nov 29 08:51:01 2018 +++ src/sys/arch/amd64/amd64/machdep.c Fri Apr 5 07:48:05 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: machdep.c,v 1.255.6.8 2018/11/29 08:51:01 martin Exp $ */ +/* $NetBSD: machdep.c,v 1.255.6.9 2019/04/05 07:48:05 martin Exp $ */ /*- * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011 @@ -111,7 +111,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.255.6.8 2018/11/29 08:51:01 martin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.255.6.9 2019/04/05 07:48:05 martin Exp $"); /* #define XENDEBUG_LOW */ @@ -1350,20 +1350,22 @@ setregs(struct lwp *l, struct exec_packa fpu_save_area_clear(l, pack->ep_osversion >= 699002600 ? __NetBSD_NPXCW__ : __NetBSD_COMPAT_NPXCW__); - pcb->pcb_flags = 0; + if (pcb->pcb_dbregs != NULL) { pool_put(&x86_dbregspl, pcb->pcb_dbregs); pcb->pcb_dbregs = NULL; } + kpreempt_disable(); + pcb->pcb_flags = 0; l->l_proc->p_flag &= ~PK_32; - l->l_md.md_flags = MDL_IRET; + cpu_fsgs_zero(l); + kpreempt_enable(); tf = l->l_md.md_regs; tf->tf_ds = LSEL(LUDATA_SEL, SEL_UPL); tf->tf_es = LSEL(LUDATA_SEL, SEL_UPL); - cpu_fsgs_zero(l); tf->tf_rdi = 0; tf->tf_rsi = 0; tf->tf_rbp = 0; Index: src/sys/arch/amd64/amd64/netbsd32_machdep.c diff -u src/sys/arch/amd64/amd64/netbsd32_machdep.c:1.105.2.2 src/sys/arch/amd64/amd64/netbsd32_machdep.c:1.105.2.3 --- src/sys/arch/amd64/amd64/netbsd32_machdep.c:1.105.2.2 Sun Jan 27 18:43:08 2019 +++ src/sys/arch/amd64/amd64/netbsd32_machdep.c Fri Apr 5 07:48:05 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: netbsd32_machdep.c,v 1.105.2.2 2019/01/27 18:43:08 martin Exp $ */ +/* $NetBSD: netbsd32_machdep.c,v 1.105.2.3 2019/04/05 07:48:05 martin Exp $ */ /* * Copyright (c) 2001 Wasabi Systems, Inc. @@ -36,7 +36,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: netbsd32_machdep.c,v 1.105.2.2 2019/01/27 18:43:08 martin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: netbsd32_machdep.c,v 1.105.2.3 2019/04/05 07:48:05 martin Exp $"); #ifdef _KERNEL_OPT #include "opt_compat_netbsd.h" @@ -132,9 +132,6 @@ netbsd32_setregs(struct lwp *l, struct e netbsd32_adjust_limits(p); - l->l_md.md_flags |= MDL_COMPAT32; /* Force iret not sysret */ - pcb->pcb_flags = PCB_COMPAT32; - fpu_save_area_clear(l, pack->ep_osversion >= 699002600 ? __NetBSD_NPXCW__ : __NetBSD_COMPAT_NPXCW__); @@ -143,13 +140,18 @@ netbsd32_setregs(struct lwp *l, struct e pcb->pcb_dbregs = NULL; } + kpreempt_disable(); + pcb->pcb_flags = PCB_COMPAT32; p->p_flag |= PK_32; + l->l_md.md_flags = MDL_COMPAT32; /* force iret not sysret */ + cpu_fsgs_zero(l); + cpu_fsgs_reload(l, LSEL(LUDATA32_SEL, SEL_UPL), + LSEL(LUDATA32_SEL, SEL_UPL)); + kpreempt_enable(); tf = l->l_md.md_regs; tf->tf_ds = LSEL(LUDATA32_SEL, SEL_UPL); tf->tf_es = LSEL(LUDATA32_SEL, SEL_UPL); - cpu_fsgs_zero(l); - cpu_fsgs_reload(l, tf->tf_ds, tf->tf_es); tf->tf_rdi = 0; tf->tf_rsi = 0; tf->tf_rbp = 0; Index: src/sys/compat/linux/arch/amd64/linux_machdep.c diff -u src/sys/compat/linux/arch/amd64/linux_machdep.c:1.51.6.1 src/sys/compat/linux/arch/amd64/linux_machdep.c:1.51.6.2 --- src/sys/compat/linux/arch/amd64/linux_machdep.c:1.51.6.1 Mon Sep 4 20:41:28 2017 +++ src/sys/compat/linux/arch/amd64/linux_machdep.c Fri Apr 5 07:48:05 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: linux_machdep.c,v 1.51.6.1 2017/09/04 20:41:28 snj Exp $ */ +/* $NetBSD: linux_machdep.c,v 1.51.6.2 2019/04/05 07:48:05 martin Exp $ */ /*- * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved. @@ -33,7 +33,7 @@ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.51.6.1 2017/09/04 20:41:28 snj Exp $"); +__KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.51.6.2 2019/04/05 07:48:05 martin Exp $"); #include <sys/param.h> #include <sys/types.h> @@ -89,11 +89,13 @@ linux_setregs(struct lwp *l, struct exec #endif fpu_save_area_clear(l, __NetBSD_NPXCW__); - pcb->pcb_flags = 0; + kpreempt_disable(); + pcb->pcb_flags = 0; l->l_proc->p_flag &= ~PK_32; - l->l_md.md_flags = MDL_IRET; + cpu_fsgs_zero(l); + kpreempt_enable(); tf = l->l_md.md_regs; tf->tf_rax = 0; @@ -118,7 +120,6 @@ linux_setregs(struct lwp *l, struct exec tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); tf->tf_es = 0; - cpu_fsgs_zero(l); return; } Index: src/sys/compat/linux32/arch/amd64/linux32_machdep.c diff -u src/sys/compat/linux32/arch/amd64/linux32_machdep.c:1.38.6.1 src/sys/compat/linux32/arch/amd64/linux32_machdep.c:1.38.6.2 --- src/sys/compat/linux32/arch/amd64/linux32_machdep.c:1.38.6.1 Sat Sep 9 17:01:23 2017 +++ src/sys/compat/linux32/arch/amd64/linux32_machdep.c Fri Apr 5 07:48:05 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: linux32_machdep.c,v 1.38.6.1 2017/09/09 17:01:23 snj Exp $ */ +/* $NetBSD: linux32_machdep.c,v 1.38.6.2 2019/04/05 07:48:05 martin Exp $ */ /*- * Copyright (c) 2006 Emmanuel Dreyfus, all rights reserved. @@ -31,7 +31,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: linux32_machdep.c,v 1.38.6.1 2017/09/09 17:01:23 snj Exp $"); +__KERNEL_RCSID(0, "$NetBSD: linux32_machdep.c,v 1.38.6.2 2019/04/05 07:48:05 martin Exp $"); #include <sys/param.h> #include <sys/proc.h> @@ -283,10 +283,14 @@ linux32_setregs(struct lwp *l, struct ex fpu_save_area_clear(l, __Linux_NPXCW__); - l->l_md.md_flags |= MDL_COMPAT32; /* Forces iret not sysret */ + kpreempt_disable(); pcb->pcb_flags = PCB_COMPAT32; - p->p_flag |= PK_32; + l->l_md.md_flags = MDL_COMPAT32; /* force iret not sysret */ + cpu_fsgs_zero(l); + cpu_fsgs_reload(l, GSEL(GUDATA32_SEL, SEL_UPL), + GSEL(GUDATA32_SEL, SEL_UPL)); + kpreempt_enable(); tf = l->l_md.md_regs; tf->tf_rax = 0; @@ -311,8 +315,6 @@ linux32_setregs(struct lwp *l, struct ex tf->tf_ss = GSEL(GUDATA32_SEL, SEL_UPL); tf->tf_ds = GSEL(GUDATA32_SEL, SEL_UPL); tf->tf_es = GSEL(GUDATA32_SEL, SEL_UPL); - cpu_fsgs_zero(l); - cpu_fsgs_reload(l, GSEL(GUDATA32_SEL, SEL_UPL), GSEL(GUDATA32_SEL, SEL_UPL)); } static void