The diff below is the first step in a clean up of the amd64 low-level
segment bits. This step is to switch user-space to using code and data
segments in the global descriptor table (GDT) instead of the local
descriptor table (LDT) and to eliminate the GDT slots that we don't
actually use.
After this diff, all that remains in the LDT is unused segments and gates
for COMPAT_* stuff that was removed months ago, so the next diff will be
to remove all that LDT crap. After that comes changes to eliminate the
unnecessary TSS-per-process stuff, which will eliminate the current
design's limit of 4k processes.
This diff shouldn't have any visible effect. Confirmation from people
running both AMD and Intel parts would be good.
Philip Guenther
Index: include/segments.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/segments.h,v
retrieving revision 1.5
diff -u -p -r1.5 segments.h
--- include/segments.h 23 May 2008 15:39:43 -0000 1.5
+++ include/segments.h 9 Oct 2010 19:48:01 -0000
@@ -253,24 +253,17 @@ void cpu_init_idt(void);
*
* Then come the predefined LDT (and possibly TSS) descriptors.
* There are NGDT_SYS of them.
+ *
+ * The particular order of the UCODE32, UDATA, and UCODE descriptors is
+ * required by the syscall/sysret instructions.
*/
#define GNULL_SEL 0 /* Null descriptor */
#define GCODE_SEL 1 /* Kernel code descriptor */
#define GDATA_SEL 2 /* Kernel data descriptor */
-#define GUCODE_SEL 3 /* User code descriptor */
+#define GUCODE32_SEL 3 /* User 32bit code descriptor (unused)
*/
#define GUDATA_SEL 4 /* User data descriptor */
-#define GAPM32CODE_SEL 5
-#define GAPM16CODE_SEL 6
-#define GAPMDATA_SEL 7
-#define GBIOSCODE_SEL 8
-#define GBIOSDATA_SEL 9
-#define GPNPBIOSCODE_SEL 10
-#define GPNPBIOSDATA_SEL 11
-#define GPNPBIOSSCRATCH_SEL 12
-#define GPNPBIOSTRAMP_SEL 13
-#define GUCODE32_SEL 14
-#define GUDATA32_SEL 15
-#define NGDT_MEM 16
+#define GUCODE_SEL 5 /* User code descriptor */
+#define NGDT_MEM 6
#define GLDT_SEL 0 /* Default LDT descriptor */
#define NGDT_SYS 1
@@ -296,7 +289,6 @@ void cpu_init_idt(void);
#define LDT_SIZE 144
-#define LSYSRETBASE_SEL LUCODE32_SEL
/*
* Checks for valid user selectors.
Index: amd64/cpu.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
retrieving revision 1.35
diff -u -p -r1.35 cpu.c
--- amd64/cpu.c 25 Jul 2010 21:43:38 -0000 1.35
+++ amd64/cpu.c 9 Oct 2010 19:48:06 -0000
@@ -654,7 +654,7 @@ cpu_init_msrs(struct cpu_info *ci)
{
wrmsr(MSR_STAR,
((uint64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
- ((uint64_t)LSEL(LSYSRETBASE_SEL, SEL_UPL) << 48));
+ ((uint64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48));
wrmsr(MSR_LSTAR, (uint64_t)Xsyscall);
wrmsr(MSR_CSTAR, (uint64_t)Xsyscall32);
wrmsr(MSR_SFMASK, PSL_NT|PSL_T|PSL_I|PSL_C);
Index: amd64/locore.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/locore.S,v
retrieving revision 1.40
diff -u -p -r1.40 locore.S
--- amd64/locore.S 28 Sep 2010 03:53:14 -0000 1.40
+++ amd64/locore.S 9 Oct 2010 19:48:16 -0000
@@ -911,7 +911,7 @@ IDTVEC(syscall)
* ss:rsp, etc, so that all GP registers can be
* saved. Then, fill in the rest.
*/
- pushq $(LSEL(LUDATA_SEL, SEL_UPL))
+ pushq $(GSEL(GUDATA_SEL, SEL_UPL))
pushq %r15
subq $(TF_RSP-TF_TRAPNO),%rsp
movq CPUVAR(SCRATCH),%r15
@@ -920,9 +920,9 @@ IDTVEC(syscall)
movw %fs,TF_FS(%rsp)
movw %gs,TF_GS(%rsp)
movw %es,TF_ES(%rsp)
- movw $(LSEL(LUDATA_SEL, SEL_UPL)),TF_DS(%rsp)
+ movw $(GSEL(GUDATA_SEL, SEL_UPL)),TF_DS(%rsp)
movq %r11, TF_RFLAGS(%rsp) /* old rflags from syscall insn */
- movq $(LSEL(LUCODE_SEL, SEL_UPL)), TF_CS(%rsp)
+ movq $(GSEL(GUCODE_SEL, SEL_UPL)), TF_CS(%rsp)
movq %rcx,TF_RIP(%rsp)
movq $2,TF_ERR(%rsp)
movq $T_ASTFLT, TF_TRAPNO(%rsp)
@@ -961,7 +961,7 @@ syscall_return:
movw TF_FS(%rsp),%fs
movw TF_GS(%rsp),%gs
INTR_RESTORE_GPRS
- movw $(LSEL(LUDATA_SEL, SEL_UPL)),%r11
+ movw $(GSEL(GUDATA_SEL, SEL_UPL)),%r11
movw %r11,%ds
addq $48,%rsp
popq %rcx /* return rip */
Index: amd64/machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/machdep.c,v
retrieving revision 1.124
diff -u -p -r1.124 machdep.c
--- amd64/machdep.c 2 Oct 2010 23:31:33 -0000 1.124
+++ amd64/machdep.c 9 Oct 2010 19:48:20 -0000
@@ -612,10 +612,10 @@ sendsig(sig_t catcher, int sig, int mask
/*
* Build context to run handler in.
*/
- tf->tf_ds = LSEL(LUDATA_SEL, SEL_UPL);
- tf->tf_es = LSEL(LUDATA_SEL, SEL_UPL);
- tf->tf_fs = LSEL(LUDATA_SEL, SEL_UPL);
- tf->tf_gs = LSEL(LUDATA_SEL, SEL_UPL);
+ tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
+ tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
+ tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
+ tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
tf->tf_rax = (u_int64_t)catcher;
tf->tf_rdi = sig;
@@ -623,10 +623,10 @@ sendsig(sig_t catcher, int sig, int mask
tf->tf_rdx = scp;
tf->tf_rip = (u_int64_t)p->p_sigcode;
- tf->tf_cs = LSEL(LUCODE_SEL, SEL_UPL);
+ tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
tf->tf_rflags &= ~(PSL_T|PSL_D|PSL_VM|PSL_AC);
tf->tf_rsp = scp;
- tf->tf_ss = LSEL(LUDATA_SEL, SEL_UPL);
+ tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
#ifdef DEBUG
if ((sigdebug & SDB_FOLLOW) && (!sigpid || p->p_pid == sigpid))
@@ -1018,10 +1018,10 @@ setregs(struct proc *p, struct exec_pack
pcb->pcb_flags = 0;
tf = p->p_md.md_regs;
- tf->tf_ds = LSEL(LUDATA_SEL, SEL_UPL);
- tf->tf_es = LSEL(LUDATA_SEL, SEL_UPL);
- tf->tf_fs = LSEL(LUDATA_SEL, SEL_UPL);
- tf->tf_gs = LSEL(LUDATA_SEL, SEL_UPL);
+ tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
+ tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
+ tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
+ tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
tf->tf_rdi = 0;
tf->tf_rsi = 0;
tf->tf_rbp = 0;
@@ -1038,10 +1038,10 @@ setregs(struct proc *p, struct exec_pack
tf->tf_r14 = 0;
tf->tf_r15 = 0;
tf->tf_rip = pack->ep_entry;
- tf->tf_cs = LSEL(LUCODE_SEL, SEL_UPL);
+ tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
tf->tf_rflags = PSL_USERSET;
tf->tf_rsp = stack;
- tf->tf_ss = LSEL(LUDATA_SEL, SEL_UPL);
+ tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
retval[1] = 0;
}
@@ -1498,12 +1498,15 @@ init_x86_64(paddr_t first_avail)
set_sys_segment(GDT_ADDR_SYS(gdtstore, GLDT_SEL), ldtstore, LDT_SIZE -
1,
SDT_SYSLDT, SEL_KPL, 0);
- set_mem_segment(GDT_ADDR_MEM(gdtstore, GUCODE_SEL), 0,
- atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMERA, SEL_UPL, 1, 0, 1);
+ set_mem_segment(GDT_ADDR_MEM(gdtstore, GUCODE32_SEL), 0,
+ atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMERA, SEL_UPL, 1, 1, 0);
set_mem_segment(GDT_ADDR_MEM(gdtstore, GUDATA_SEL), 0,
atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 0, 1);
+ set_mem_segment(GDT_ADDR_MEM(gdtstore, GUCODE_SEL), 0,
+ atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMERA, SEL_UPL, 1, 0, 1);
+
/* make ldt gates and memory segments */
setgate((struct gate_descriptor *)(ldtstore + LSYS5CALLS_SEL),
&IDTVEC(oosyscall), 0, SDT_SYS386CGT, SEL_UPL,
@@ -1513,16 +1516,6 @@ init_x86_64(paddr_t first_avail)
*GDT_ADDR_MEM(gdtstore, GUCODE_SEL);
*(struct mem_segment_descriptor *)(ldtstore + LUDATA_SEL) =
*GDT_ADDR_MEM(gdtstore, GUDATA_SEL);
-
- /*
- * 32 bit GDT entries.
- */
-
- set_mem_segment(GDT_ADDR_MEM(gdtstore, GUCODE32_SEL), 0,
- atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMERA, SEL_UPL, 1, 1, 0);
-
- set_mem_segment(GDT_ADDR_MEM(gdtstore, GUDATA32_SEL), 0,
- atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 1, 0);
/*
* 32 bit LDT entries.