Module Name:    src
Committed By:   maxv
Date:           Thu Oct 19 18:36:31 UTC 2017

Modified Files:
        src/sys/arch/amd64/acpi: acpi_wakeup_low.S
        src/sys/arch/amd64/amd64: locore.S machdep.c

Log Message:
Improve our segregs model. Pass 1/3.

Right now, we are saving and restoring %ds/%es each time we enter/leave the
kernel. However, we let %fs/%gs live in the kernel space, and we rely on
the fact that when switching to an LWP, %fs/%gs are set right away (via
cpu_switchto or setregs).

It has two drawbacks: we are taking care of %ds/%es while they are
deprecated (useless) on 64bit LWPs, and we are restricting %fs/%gs while
they still have a meaning on 32bit LWPs.

Therefore, handle 32bit and 64bit LWPs differently:
 * 64bit LWPs use fixed segregs, which are not taken care of.
 * 32bit LWPs have dynamic segregs, always saved/restored.

For now, only %ds and %es are changed; %fs and %gs will be in the next
passes.

The trapframe is constructed as usual. In INTRFASTEXIT, we restore %ds/%es
depending on the %cs value. If %cs contains one of the two standard 64bit
selectors, don't do anything. Otherwise, restore everything.

When doing a context switch, just restore %ds/%es to their default values.
On a 32bit LWP they will be overwritten by INTRFASTEXIT; on a 64bit LWP
they won't be updated.

In the ACPI wakeup code, restore %ds/%es to the default 64bit user value.


To generate a diff of this commit:
cvs rdiff -u -r1.6 -r1.7 src/sys/arch/amd64/acpi/acpi_wakeup_low.S
cvs rdiff -u -r1.133 -r1.134 src/sys/arch/amd64/amd64/locore.S
cvs rdiff -u -r1.269 -r1.270 src/sys/arch/amd64/amd64/machdep.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/amd64/acpi/acpi_wakeup_low.S
diff -u src/sys/arch/amd64/acpi/acpi_wakeup_low.S:1.6 src/sys/arch/amd64/acpi/acpi_wakeup_low.S:1.7
--- src/sys/arch/amd64/acpi/acpi_wakeup_low.S:1.6	Sat Sep 23 10:18:49 2017
+++ src/sys/arch/amd64/acpi/acpi_wakeup_low.S	Thu Oct 19 18:36:31 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: acpi_wakeup_low.S,v 1.6 2017/09/23 10:18:49 maxv Exp $	*/
+/*	$NetBSD: acpi_wakeup_low.S,v 1.7 2017/10/19 18:36:31 maxv Exp $	*/
 
 /*-
  * Copyright (c) 2007 Joerg Sonnenberger <jo...@netbsd.org>
@@ -40,12 +40,17 @@ acpi_md_sleep_exit:
 	lgdt	ACPI_SUSPEND_GDT(%r8)
 
 	/* Reload fixed descriptors for new GDT */
-	movw	$GSEL(GDATA_SEL, SEL_KPL),%ax
+	movw	$GSEL(GUDATA_SEL, SEL_UPL),%ax
 	movw	%ax,%ds
 	movw	%ax,%es
+	movw	$GSEL(GDATA_SEL, SEL_KPL),%ax
 	movw	%ax,%ss
 
-	/* FS and GS are driven by MSRs, so use NULL for them */
+	/*
+	 * FS and GS are driven by MSRs, so use NULL for them.
+	 * XXX XXX XXX That's not the case if we're returning to a 32bit
+	 * LWP!
+	 */
 	xorw	%ax,%ax
 	movw	%ax,%fs
 	movw	%ax,%gs

Index: src/sys/arch/amd64/amd64/locore.S
diff -u src/sys/arch/amd64/amd64/locore.S:1.133 src/sys/arch/amd64/amd64/locore.S:1.134
--- src/sys/arch/amd64/amd64/locore.S:1.133	Tue Oct 17 07:48:10 2017
+++ src/sys/arch/amd64/amd64/locore.S	Thu Oct 19 18:36:31 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: locore.S,v 1.133 2017/10/17 07:48:10 maxv Exp $	*/
+/*	$NetBSD: locore.S,v 1.134 2017/10/19 18:36:31 maxv Exp $	*/
 
 /*
  * Copyright-o-rama!
@@ -1159,7 +1159,10 @@ skip_CR0:
 	jnz	lwp_32bit
 
 lwp_64bit:
-	/* Zero out %fs/%gs registers. */
+	/* Set default 64bit values in %ds, %es, %fs and %gs. */
+	movq	$GSEL(GUDATA_SEL, SEL_UPL),%rax
+	movw	%ax,%ds
+	movw	%ax,%es
 	xorq	%rax,%rax
 	movw	%ax,%fs
 	CLI(cx)
@@ -1193,8 +1196,11 @@ lwp_32bit:
 	movq	PCB_GS(%r14),%rax
 	movq	%rax,(GUGS_SEL*8)(%rcx)
 
-	/* Reload %fs and %gs */
+	/* Set default 32bit values in %ds, %es. %fs and %gs are special. */
 	movq	L_MD_REGS(%r12),%rbx
+	movq	$GSEL(GUDATA32_SEL, SEL_UPL),%rax
+	movw	%ax,%ds
+	movw	%ax,%es
 	movw	TF_FS(%rbx),%fs
 	CLI(ax)
 	SWAPGS
@@ -1281,10 +1287,10 @@ IDTVEC(syscall)
 	cld
 #endif
 	INTR_SAVE_GPRS
-	movw	%es,TF_ES(%rsp)
-	movw	%fs,TF_FS(%rsp)
-	movw	%gs,TF_GS(%rsp)
-	movw	$(GSEL(GUDATA_SEL, SEL_UPL)),TF_DS(%rsp)
+	movw	$GSEL(GUDATA_SEL, SEL_UPL),TF_DS(%rsp)
+	movw	$GSEL(GUDATA_SEL, SEL_UPL),TF_ES(%rsp)
+	movw	$0,TF_FS(%rsp)
+	movw	$0,TF_GS(%rsp)
 	STI(si)
 
 do_syscall:
@@ -1313,18 +1319,18 @@ do_syscall:
 #endif
 
 	/*
-	 * If the syscall might have modified some registers, or we are a 32bit
-	 * process we must return to user with an 'iret' instruction.
-	 * If the iret faults in kernel (assumed due to illegal register values)
-	 * then a SIGSEGV will be signalled.
+	 * Decide if we need to take a slow path. That's the case when we
+	 * want to reload %cs and %ss on a 64bit LWP (MDL_IRET set), or when
+	 * we're returning to a 32bit LWP (MDL_COMPAT32 set).
+	 *
+	 * In either case, we jump into intrfastexit and return to userland
+	 * with the iret instruction.
 	 */
 	testl	$(MDL_IRET|MDL_COMPAT32),L_MD_FLAGS(%r14)
+	jnz	intrfastexit
+
 	INTR_RESTORE_GPRS
-	movw	TF_ES(%rsp),%es
-	movw	TF_DS(%rsp),%ds
 	SWAPGS
-	jnz	.Lkexit
-
 #ifndef XEN
 	movq	TF_RIP(%rsp),%rcx	/* %rip for sysret */
 	movq	TF_RFLAGS(%rsp),%r11	/* %flags for sysret */
@@ -1469,12 +1475,21 @@ ENTRY(intrfastexit)
 	INTR_RESTORE_GPRS
 	testq	$SEL_UPL,TF_CS(%rsp)	/* interrupted %cs */
 	jz	.Lkexit
+	cmpq	$LSEL(LUCODE_SEL, SEL_UPL),TF_CS(%rsp)
+	je	.Luexit64
+	cmpq	$GSEL(GUCODE_SEL, SEL_UPL),TF_CS(%rsp)
+	je	.Luexit64
 
-	/* Disable interrupts until the 'iret', user registers loaded. */
+.Luexit32:
 	NOT_XEN(cli;)
 	movw	TF_ES(%rsp),%es
 	movw	TF_DS(%rsp),%ds
 	SWAPGS
+	jmp	.Lkexit
+
+.Luexit64:
+	NOT_XEN(cli;)
+	SWAPGS
 
 .Lkexit:
 	addq	$TF_REGSIZE+16,%rsp	/* + T_xxx and error code */

Index: src/sys/arch/amd64/amd64/machdep.c
diff -u src/sys/arch/amd64/amd64/machdep.c:1.269 src/sys/arch/amd64/amd64/machdep.c:1.270
--- src/sys/arch/amd64/amd64/machdep.c:1.269	Thu Oct 19 10:01:09 2017
+++ src/sys/arch/amd64/amd64/machdep.c	Thu Oct 19 18:36:31 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: machdep.c,v 1.269 2017/10/19 10:01:09 maxv Exp $	*/
+/*	$NetBSD: machdep.c,v 1.270 2017/10/19 18:36:31 maxv Exp $	*/
 
 /*
  * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011
@@ -110,7 +110,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.269 2017/10/19 10:01:09 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.270 2017/10/19 18:36:31 maxv Exp $");
 
 /* #define XENDEBUG_LOW  */
 
@@ -447,15 +447,19 @@ x86_64_tls_switch(struct lwp *l)
 		HYPERVISOR_fpu_taskswitch(1);
 	}
 
-	/* Update TLS segment pointers */
+	/* Update segment registers */
 	if (pcb->pcb_flags & PCB_COMPAT32) {
 		update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &pcb->pcb_fs);
 		update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &pcb->pcb_gs);
+		setds(GSEL(GUDATA32_SEL, SEL_UPL));
+		setes(GSEL(GUDATA32_SEL, SEL_UPL));
 		setfs(tf->tf_fs);
 		HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, tf->tf_gs);
 	} else {
 		update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &zero);
 		update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &zero);
+		setds(GSEL(GUDATA_SEL, SEL_UPL));
+		setes(GSEL(GUDATA_SEL, SEL_UPL));
 		setfs(0);
 		HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, 0);
 		HYPERVISOR_set_segment_base(SEGBASE_FS, pcb->pcb_fs);
@@ -2063,6 +2067,8 @@ cpu_segregs64_zero(struct lwp *l)
 	kpreempt_disable();
 	tf->tf_fs = 0;
 	tf->tf_gs = 0;
+	setds(GSEL(GUDATA_SEL, SEL_UPL));
+	setes(GSEL(GUDATA_SEL, SEL_UPL));
 	setfs(0);
 	setusergs(0);
 
@@ -2100,6 +2106,8 @@ cpu_segregs32_zero(struct lwp *l)
 	kpreempt_disable();
 	tf->tf_fs = 0;
 	tf->tf_gs = 0;
+	setds(GSEL(GUDATA32_SEL, SEL_UPL));
+	setes(GSEL(GUDATA32_SEL, SEL_UPL));
 	setfs(0);
 	setusergs(0);
 	pcb->pcb_fs = 0;

Reply via email to