Module Name: src Committed By: maxv Date: Thu Oct 19 18:36:31 UTC 2017
Modified Files: src/sys/arch/amd64/acpi: acpi_wakeup_low.S src/sys/arch/amd64/amd64: locore.S machdep.c Log Message: Improve our segregs model. Pass 1/3. Right now, we are saving and restoring %ds/%es each time we enter/leave the kernel. However, we let %fs/%gs live in the kernel space, and we rely on the fact that when switching to an LWP, %fs/%gs are set right away (via cpu_switchto or setregs). It has two drawbacks: we are taking care of %ds/%es while they are deprecated (useless) on 64bit LWPs, and we are restricting %fs/%gs while they still have a meaning on 32bit LWPs. Therefore, handle 32bit and 64bit LWPs differently: * 64bit LWPs use fixed segregs, which are not taken care of. * 32bit LWPs have dynamic segregs, always saved/restored. For now, only %ds and %es are changed; %fs and %gs will be in the next passes. The trapframe is constructed as usual. In INTRFASTEXIT, we restore %ds/%es depending on the %cs value. If %cs contains one of the two standard 64bit selectors, don't do anything. Otherwise, restore everything. When doing a context switch, just restore %ds/%es to their default values. On a 32bit LWP they will be overwritten by INTRFASTEXIT; on a 64bit LWP they won't be updated. In the ACPI wakeup code, restore %ds/%es to the default 64bit user value. To generate a diff of this commit: cvs rdiff -u -r1.6 -r1.7 src/sys/arch/amd64/acpi/acpi_wakeup_low.S cvs rdiff -u -r1.133 -r1.134 src/sys/arch/amd64/amd64/locore.S cvs rdiff -u -r1.269 -r1.270 src/sys/arch/amd64/amd64/machdep.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/amd64/acpi/acpi_wakeup_low.S diff -u src/sys/arch/amd64/acpi/acpi_wakeup_low.S:1.6 src/sys/arch/amd64/acpi/acpi_wakeup_low.S:1.7 --- src/sys/arch/amd64/acpi/acpi_wakeup_low.S:1.6 Sat Sep 23 10:18:49 2017 +++ src/sys/arch/amd64/acpi/acpi_wakeup_low.S Thu Oct 19 18:36:31 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: acpi_wakeup_low.S,v 1.6 2017/09/23 10:18:49 maxv Exp $ */ +/* $NetBSD: acpi_wakeup_low.S,v 1.7 2017/10/19 18:36:31 maxv Exp $ */ /*- * Copyright (c) 2007 Joerg Sonnenberger <jo...@netbsd.org> @@ -40,12 +40,17 @@ acpi_md_sleep_exit: lgdt ACPI_SUSPEND_GDT(%r8) /* Reload fixed descriptors for new GDT */ - movw $GSEL(GDATA_SEL, SEL_KPL),%ax + movw $GSEL(GUDATA_SEL, SEL_UPL),%ax movw %ax,%ds movw %ax,%es + movw $GSEL(GDATA_SEL, SEL_KPL),%ax movw %ax,%ss - /* FS and GS are driven by MSRs, so use NULL for them */ + /* + * FS and GS are driven by MSRs, so use NULL for them. + * XXX XXX XXX That's not the case if we're returning to a 32bit + * LWP! + */ xorw %ax,%ax movw %ax,%fs movw %ax,%gs Index: src/sys/arch/amd64/amd64/locore.S diff -u src/sys/arch/amd64/amd64/locore.S:1.133 src/sys/arch/amd64/amd64/locore.S:1.134 --- src/sys/arch/amd64/amd64/locore.S:1.133 Tue Oct 17 07:48:10 2017 +++ src/sys/arch/amd64/amd64/locore.S Thu Oct 19 18:36:31 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: locore.S,v 1.133 2017/10/17 07:48:10 maxv Exp $ */ +/* $NetBSD: locore.S,v 1.134 2017/10/19 18:36:31 maxv Exp $ */ /* * Copyright-o-rama! @@ -1159,7 +1159,10 @@ skip_CR0: jnz lwp_32bit lwp_64bit: - /* Zero out %fs/%gs registers. */ + /* Set default 64bit values in %ds, %es, %fs and %gs. */ + movq $GSEL(GUDATA_SEL, SEL_UPL),%rax + movw %ax,%ds + movw %ax,%es xorq %rax,%rax movw %ax,%fs CLI(cx) @@ -1193,8 +1196,11 @@ lwp_32bit: movq PCB_GS(%r14),%rax movq %rax,(GUGS_SEL*8)(%rcx) - /* Reload %fs and %gs */ + /* Set default 32bit values in %ds, %es. %fs and %gs are special. */ movq L_MD_REGS(%r12),%rbx + movq $GSEL(GUDATA32_SEL, SEL_UPL),%rax + movw %ax,%ds + movw %ax,%es movw TF_FS(%rbx),%fs CLI(ax) SWAPGS @@ -1281,10 +1287,10 @@ IDTVEC(syscall) cld #endif INTR_SAVE_GPRS - movw %es,TF_ES(%rsp) - movw %fs,TF_FS(%rsp) - movw %gs,TF_GS(%rsp) - movw $(GSEL(GUDATA_SEL, SEL_UPL)),TF_DS(%rsp) + movw $GSEL(GUDATA_SEL, SEL_UPL),TF_DS(%rsp) + movw $GSEL(GUDATA_SEL, SEL_UPL),TF_ES(%rsp) + movw $0,TF_FS(%rsp) + movw $0,TF_GS(%rsp) STI(si) do_syscall: @@ -1313,18 +1319,18 @@ do_syscall: #endif /* - * If the syscall might have modified some registers, or we are a 32bit - * process we must return to user with an 'iret' instruction. - * If the iret faults in kernel (assumed due to illegal register values) - * then a SIGSEGV will be signalled. + * Decide if we need to take a slow path. That's the case when we + * want to reload %cs and %ss on a 64bit LWP (MDL_IRET set), or when + * we're returning to a 32bit LWP (MDL_COMPAT32 set). + * + * In either case, we jump into intrfastexit and return to userland + * with the iret instruction. */ testl $(MDL_IRET|MDL_COMPAT32),L_MD_FLAGS(%r14) + jnz intrfastexit + INTR_RESTORE_GPRS - movw TF_ES(%rsp),%es - movw TF_DS(%rsp),%ds SWAPGS - jnz .Lkexit - #ifndef XEN movq TF_RIP(%rsp),%rcx /* %rip for sysret */ movq TF_RFLAGS(%rsp),%r11 /* %flags for sysret */ @@ -1469,12 +1475,21 @@ ENTRY(intrfastexit) INTR_RESTORE_GPRS testq $SEL_UPL,TF_CS(%rsp) /* interrupted %cs */ jz .Lkexit + cmpq $LSEL(LUCODE_SEL, SEL_UPL),TF_CS(%rsp) + je .Luexit64 + cmpq $GSEL(GUCODE_SEL, SEL_UPL),TF_CS(%rsp) + je .Luexit64 - /* Disable interrupts until the 'iret', user registers loaded. */ +.Luexit32: NOT_XEN(cli;) movw TF_ES(%rsp),%es movw TF_DS(%rsp),%ds SWAPGS + jmp .Lkexit + +.Luexit64: + NOT_XEN(cli;) + SWAPGS .Lkexit: addq $TF_REGSIZE+16,%rsp /* + T_xxx and error code */ Index: src/sys/arch/amd64/amd64/machdep.c diff -u src/sys/arch/amd64/amd64/machdep.c:1.269 src/sys/arch/amd64/amd64/machdep.c:1.270 --- src/sys/arch/amd64/amd64/machdep.c:1.269 Thu Oct 19 10:01:09 2017 +++ src/sys/arch/amd64/amd64/machdep.c Thu Oct 19 18:36:31 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: machdep.c,v 1.269 2017/10/19 10:01:09 maxv Exp $ */ +/* $NetBSD: machdep.c,v 1.270 2017/10/19 18:36:31 maxv Exp $ */ /* * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011 @@ -110,7 +110,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.269 2017/10/19 10:01:09 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.270 2017/10/19 18:36:31 maxv Exp $"); /* #define XENDEBUG_LOW */ @@ -447,15 +447,19 @@ x86_64_tls_switch(struct lwp *l) HYPERVISOR_fpu_taskswitch(1); } - /* Update TLS segment pointers */ + /* Update segment registers */ if (pcb->pcb_flags & PCB_COMPAT32) { update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &pcb->pcb_fs); update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &pcb->pcb_gs); + setds(GSEL(GUDATA32_SEL, SEL_UPL)); + setes(GSEL(GUDATA32_SEL, SEL_UPL)); setfs(tf->tf_fs); HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, tf->tf_gs); } else { update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &zero); update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &zero); + setds(GSEL(GUDATA_SEL, SEL_UPL)); + setes(GSEL(GUDATA_SEL, SEL_UPL)); setfs(0); HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, 0); HYPERVISOR_set_segment_base(SEGBASE_FS, pcb->pcb_fs); @@ -2063,6 +2067,8 @@ cpu_segregs64_zero(struct lwp *l) kpreempt_disable(); tf->tf_fs = 0; tf->tf_gs = 0; + setds(GSEL(GUDATA_SEL, SEL_UPL)); + setes(GSEL(GUDATA_SEL, SEL_UPL)); setfs(0); setusergs(0); @@ -2100,6 +2106,8 @@ cpu_segregs32_zero(struct lwp *l) kpreempt_disable(); tf->tf_fs = 0; tf->tf_gs = 0; + setds(GSEL(GUDATA32_SEL, SEL_UPL)); + setes(GSEL(GUDATA32_SEL, SEL_UPL)); setfs(0); setusergs(0); pcb->pcb_fs = 0;