Thank you again to the people who tested the first diff.  Here's diff two 
in the cleanup of the amd64 low-level segment bits.  Now that the LDT is 
only used by dead compat code, load the ldt register with the null 
selector (disabling use of it), stop reloading it on every context switch, 
and blow away the table itself, as well as the pcb and pmap bits that were 
used to track it.

While we're here, delete two other unused pcb members: pcb_usersp and 
pcb_flags.  (Deleting pcb_usersp also keeps the pcb_savefpu member aligned 
properly.)  Finally, delete the defines for the unimplemented 
AMD64_{GET,SET}_LDT sysarch() calls.


This diff shaves a few instructions (including a middling expensive lldt) 
from the process switching path, but should otherwise have no visible 
effect**.  Testing from both AMD and Intel parts would be good.

After this diff comes the interesting one, switching from TSS-per-process 
to TSS-per-CPU, which will eliminate GDT changes/growth as well as the 
limit of ~4k processes and shave still more off the process switching 
path.


Philip Guenther

** if someone hardcoded the old LDT selector numbers into ASM in
their code, their programs will break, but that code would be both
idiotic and pointless


Index: include/pcb.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/pcb.h,v
retrieving revision 1.5
diff -u -p -r1.5 pcb.h
--- include/pcb.h       26 Jun 2008 05:42:09 -0000      1.5
+++ include/pcb.h       14 Oct 2010 06:36:41 -0000
@@ -97,11 +97,8 @@ struct pcb {
        u_int64_t pcb_cr3;
        u_int64_t pcb_rsp;
        u_int64_t pcb_rbp;
-       u_int64_t pcb_usersp;
-       u_int64_t pcb_ldt_sel;
        struct  savefpu pcb_savefpu;    /* floating point state */
        int     pcb_cr0;                /* saved image of CR0 */
-       int     pcb_flags;
        caddr_t pcb_onfault;            /* copyin/out fault recovery */
        struct cpu_info *pcb_fpcpu;     /* cpu holding our fp state. */
        unsigned pcb_iomap[NIOPORTS/32];        /* I/O bitmap */
Index: include/pmap.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/pmap.h,v
retrieving revision 1.34
diff -u -p -r1.34 pmap.h
--- include/pmap.h      6 Sep 2010 17:36:49 -0000       1.34
+++ include/pmap.h      14 Oct 2010 06:36:42 -0000
@@ -318,9 +318,6 @@ struct pmap {
                                        /* pointer to a PTP in our pmap */
        struct pmap_statistics pm_stats;  /* pmap stats (lck by object lock) */
 
-       union descriptor *pm_ldt;       /* user-set LDT */
-       int pm_ldt_len;                 /* number of LDT entries */
-       int pm_ldt_sel;                 /* LDT selector */
        u_int32_t pm_cpus;              /* mask of CPUs using pmap */
 };
 
Index: include/segments.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/segments.h,v
retrieving revision 1.6
diff -u -p -r1.6 segments.h
--- include/segments.h  14 Oct 2010 04:38:24 -0000      1.6
+++ include/segments.h  14 Oct 2010 06:36:42 -0000
@@ -61,7 +61,7 @@
 #define        ISLDT(s)        ((s) & SEL_LDT) /* is it local or global */
 #define        SEL_LDT         4               /* local descriptor table */    
 
-/* Dynamically allocated TSSs and LDTs start (byte offset) */
+/* Dynamically allocated TSSs start (byte offset) */
 #define SYSSEL_START   (NGDT_MEM << 3)
 #define DYNSEL_START   (SYSSEL_START + (NGDT_SYS << 4))
 
@@ -69,9 +69,8 @@
  * These define the index not from the start of the GDT, but from
  * the part of the GDT that they're allocated from.
  * First NGDT_MEM entries are 8-byte descriptors for CS and DS.
- * Next NGDT_SYS entries are 16-byte descriptors defining LDTs.
  *
- * The rest is 16-byte descriptors for TSS and LDT.
+ * The rest is 16-byte descriptors for TSSs
  */
 
 #define        IDXSEL(s)       (((s) >> 3) & 0x1fff)
@@ -158,7 +157,6 @@ extern struct sys_segment_descriptor *ld
 #endif
 extern struct gate_descriptor *idt;
 extern char *gdtstore;
-extern char *ldtstore;
 
 void setgate(struct gate_descriptor *, void *, int, int, int, int);
 void unsetgate(struct gate_descriptor *);
@@ -265,8 +263,7 @@ void cpu_init_idt(void);
 #define        GUCODE_SEL      5       /* User code descriptor */
 #define NGDT_MEM 6
 
-#define        GLDT_SEL        0       /* Default LDT descriptor */
-#define NGDT_SYS       1
+#define NGDT_SYS       0
 
 #define GDT_SYS_OFFSET (NGDT_MEM << 3)
 
@@ -276,31 +273,11 @@ void cpu_init_idt(void);
    ((struct sys_segment_descriptor *)((s) + (((i) << 4) + SYSSEL_START)))
 
 /*
- * Byte offsets in the Local Descriptor Table (LDT)
- * Strange order because of syscall/sysret insns
- */
-#define        LSYS5CALLS_SEL  0       /* iBCS system call gate */
-#define LUCODE32_SEL   8       /* 32 bit user code descriptor */
-#define        LUDATA_SEL      16      /* User data descriptor */
-#define        LUCODE_SEL      24      /* User code descriptor */
-#define        LSOL26CALLS_SEL 32      /* Solaris 2.6 system call gate */
-#define LUDATA32_SEL   56      /* 32 bit user data descriptor (needed?)*/
-#define        LBSDICALLS_SEL  128     /* BSDI system call gate */
-
-#define LDT_SIZE       144
-
-
-/*
  * Checks for valid user selectors.
  */
-#define VALID_USER_DSEL32(s) \
-    ((s) == GSEL(GUDATA32_SEL, SEL_UPL) || (s) == LSEL(LUDATA32_SEL, SEL_UPL))
-#define VALID_USER_CSEL32(s) \
-    ((s) == GSEL(GUCODE32_SEL, SEL_UPL) || (s) == LSEL(LUCODE32_SEL, SEL_UPL))
-
 #define VALID_USER_CSEL(s) \
-    ((s) == GSEL(GUCODE_SEL, SEL_UPL) || (s) == LSEL(LUCODE_SEL, SEL_UPL))
+    ((s) == GSEL(GUCODE_SEL, SEL_UPL))
 #define VALID_USER_DSEL(s) \
-    ((s) == GSEL(GUDATA_SEL, SEL_UPL) || (s) == LSEL(LUDATA_SEL, SEL_UPL))
+    ((s) == GSEL(GUDATA_SEL, SEL_UPL))
 
 #endif /* _AMD64_SEGMENTS_H_ */
Index: include/sysarch.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/sysarch.h,v
retrieving revision 1.4
diff -u -p -r1.4 sysarch.h
--- include/sysarch.h   18 Sep 2009 21:08:19 -0000      1.4
+++ include/sysarch.h   14 Oct 2010 06:36:42 -0000
@@ -7,8 +7,6 @@
 /*
  * Architecture specific syscalls (amd64)
  */
-#define AMD64_GET_LDT  0
-#define AMD64_SET_LDT  1
 #define        AMD64_IOPL      2
 #define        AMD64_GET_IOPERM        3
 #define        AMD64_SET_IOPERM        4
Index: amd64/cpu.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
retrieving revision 1.36
diff -u -p -r1.36 cpu.c
--- amd64/cpu.c 14 Oct 2010 04:38:24 -0000      1.36
+++ amd64/cpu.c 14 Oct 2010 06:36:42 -0000
@@ -512,7 +512,7 @@ cpu_hatch(void *v)
        gdt_init_cpu(ci);
        fpuinit(ci);
 
-       lldt(GSYSSEL(GLDT_SEL, SEL_KPL));
+       lldt(0);
 
        cpu_init(ci);
 
Index: amd64/gdt.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/gdt.c,v
retrieving revision 1.16
diff -u -p -r1.16 gdt.c
--- amd64/gdt.c 26 Jun 2010 23:24:43 -0000      1.16
+++ amd64/gdt.c 14 Oct 2010 06:36:43 -0000
@@ -124,8 +124,6 @@ gdt_init(void)
        }
        bcopy(old_gdt, gdtstore, DYNSEL_START);
        ci->ci_gdt = gdtstore;
-       set_sys_segment(GDT_ADDR_SYS(gdtstore, GLDT_SEL), ldtstore,
-           LDT_SIZE - 1, SDT_SYSLDT, SEL_KPL, 0);
 
        gdt_init_cpu(ci);
 }
Index: amd64/genassym.cf
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/genassym.cf,v
retrieving revision 1.23
diff -u -p -r1.23 genassym.cf
--- amd64/genassym.cf   24 Sep 2010 13:21:30 -0000      1.23
+++ amd64/genassym.cf   14 Oct 2010 06:36:43 -0000
@@ -84,10 +84,8 @@ struct pcb
 member pcb_cr3
 member pcb_rbp
 member pcb_rsp
-member pcb_usersp
 member PCB_RSP0        pcb_tss.tss_rsp0
 member pcb_cr0
-member pcb_ldt_sel
 member pcb_onfault
 member pcb_fpcpu
 
Index: amd64/locore.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/locore.S,v
retrieving revision 1.41
diff -u -p -r1.41 locore.S
--- amd64/locore.S      14 Oct 2010 04:38:24 -0000      1.41
+++ amd64/locore.S      14 Oct 2010 06:36:43 -0000
@@ -1008,26 +1008,12 @@ NENTRY(child_trampoline)
 
        .globl  _C_LABEL(osyscall_return)
 
-/* XXX - can we zap the following two? */
-
-/*
- * Old call gate entry for syscall. only needed if we're
- * going to support running old NetBSD or ibcs2 binaries, etc,
- * on NetBSD/amd64.
- */
-IDTVEC(oosyscall)
-       /* Set rflags in trap frame. */
-       pushfq
-       popq    8(%rsp)
-       pushq   $7              # size of instruction for restart
-       jmp     osyscall1
 
 /*
  * Trap gate entry for int $80 syscall, also used by sigreturn.
  */
 IDTVEC(osyscall)
        pushq   $2              # size of instruction for restart
-osyscall1:
        pushq   $T_ASTFLT       # trap # for doing ASTs
        INTRENTRY
        sti
Index: amd64/machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/machdep.c,v
retrieving revision 1.125
diff -u -p -r1.125 machdep.c
--- amd64/machdep.c     14 Oct 2010 04:38:24 -0000      1.125
+++ amd64/machdep.c     14 Oct 2010 06:36:45 -0000
@@ -360,7 +360,7 @@ cpu_startup(void)
 }
 
 /*
- * Set up proc0's TSS and LDT.
+ * Set up proc0's TSS
  */
 void
 x86_64_proc0_tss_ldt_init(void)
@@ -372,14 +372,11 @@ x86_64_proc0_tss_ldt_init(void)
 
        cpu_info_primary.ci_curpcb = pcb = &proc0.p_addr->u_pcb;
 
-       pcb->pcb_flags = 0;
        pcb->pcb_tss.tss_iobase =
            (u_int16_t)((caddr_t)pcb->pcb_iomap - (caddr_t)&pcb->pcb_tss);
        for (x = 0; x < sizeof(pcb->pcb_iomap) / 4; x++)
                pcb->pcb_iomap[x] = 0xffffffff;
 
-       pcb->pcb_ldt_sel = pmap_kernel()->pm_ldt_sel =
-           GSYSSEL(GLDT_SEL, SEL_KPL);
        pcb->pcb_cr0 = rcr0();
        pcb->pcb_tss.tss_rsp0 = (u_int64_t)proc0.p_addr + USPACE - 16;
        pcb->pcb_tss.tss_ist[0] = (u_int64_t)proc0.p_addr + PAGE_SIZE;
@@ -387,11 +384,11 @@ x86_64_proc0_tss_ldt_init(void)
        proc0.p_md.md_tss_sel = tss_alloc(pcb);
 
        ltr(proc0.p_md.md_tss_sel);
-       lldt(pcb->pcb_ldt_sel);
+       lldt(0);
 }
 
 /*       
- * Set up TSS and LDT for a new PCB.
+ * Set up TSS for a new PCB.
  */         
          
 #ifdef MULTIPROCESSOR
@@ -406,9 +403,6 @@ x86_64_init_pcb_tss_ldt(struct cpu_info 
        for (x = 0; x < sizeof(pcb->pcb_iomap) / 4; x++)
                pcb->pcb_iomap[x] = 0xffffffff;
 
-       /* XXXfvdl pmap_kernel not needed */ 
-       pcb->pcb_ldt_sel = pmap_kernel()->pm_ldt_sel =
-           GSYSSEL(GLDT_SEL, SEL_KPL);
        pcb->pcb_cr0 = rcr0();
         
         ci->ci_idle_tss_sel = tss_alloc(pcb);
@@ -1007,7 +1001,6 @@ void
 setregs(struct proc *p, struct exec_package *pack, u_long stack,
     register_t *retval)
 {
-       struct pcb *pcb = &p->p_addr->u_pcb;
        struct trapframe *tf;
 
        /* If we were using the FPU, forget about it. */
@@ -1015,8 +1008,6 @@ setregs(struct proc *p, struct exec_pack
                fpusave_proc(p, 0);
        p->p_md.md_flags &= ~MDP_USEDFPU;
 
-       pcb->pcb_flags = 0;
-
        tf = p->p_md.md_regs;
        tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
        tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
@@ -1053,7 +1044,6 @@ setregs(struct proc *p, struct exec_pack
 struct gate_descriptor *idt;
 char idt_allocmap[NIDT];
 struct simplelock idt_lock;
-char *ldtstore;
 char *gdtstore;
 extern  struct user *proc0paddr;
 
@@ -1194,10 +1184,7 @@ map_tramps(void) {
 
 #define        IDTVEC(name)    __CONCAT(X, name)
 typedef void (vector)(void);
-extern vector IDTVEC(syscall);
-extern vector IDTVEC(syscall32);
 extern vector IDTVEC(osyscall);
-extern vector IDTVEC(oosyscall);
 extern vector *IDTVEC(exceptions)[];
 
 int bigmem = 0;
@@ -1207,7 +1194,6 @@ init_x86_64(paddr_t first_avail)
 {
        extern void consinit(void);
        struct region_descriptor region;
-       struct mem_segment_descriptor *ldt_segp;
        bios_memmap_t *bmp;
        int x, ist;
 
@@ -1486,56 +1472,22 @@ init_x86_64(paddr_t first_avail)
 
        idt = (struct gate_descriptor *)idt_vaddr;
        gdtstore = (char *)(idt + NIDT);
-       ldtstore = gdtstore + DYNSEL_START;
 
        /* make gdt gates and memory segments */
-       set_mem_segment(GDT_ADDR_MEM(gdtstore, GCODE_SEL), 0, 0xfffff, 
SDT_MEMERA,
-           SEL_KPL, 1, 0, 1);
-
-       set_mem_segment(GDT_ADDR_MEM(gdtstore, GDATA_SEL), 0, 0xfffff, 
SDT_MEMRWA,
-           SEL_KPL, 1, 0, 1);
+       set_mem_segment(GDT_ADDR_MEM(gdtstore, GCODE_SEL), 0,
+           0xfffff, SDT_MEMERA, SEL_KPL, 1, 0, 1);
 
-       set_sys_segment(GDT_ADDR_SYS(gdtstore, GLDT_SEL), ldtstore, LDT_SIZE - 
1,
-           SDT_SYSLDT, SEL_KPL, 0);
+       set_mem_segment(GDT_ADDR_MEM(gdtstore, GDATA_SEL), 0,
+           0xfffff, SDT_MEMRWA, SEL_KPL, 1, 0, 1);
 
        set_mem_segment(GDT_ADDR_MEM(gdtstore, GUCODE32_SEL), 0,
-           atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMERA, SEL_UPL, 1, 1, 0);
+           atop(VM_MAXUSER_ADDRESS32) - 1, SDT_MEMERA, SEL_UPL, 1, 1, 0);
 
        set_mem_segment(GDT_ADDR_MEM(gdtstore, GUDATA_SEL), 0,
            atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 0, 1);
 
        set_mem_segment(GDT_ADDR_MEM(gdtstore, GUCODE_SEL), 0,
            atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMERA, SEL_UPL, 1, 0, 1);
-
-       /* make ldt gates and memory segments */
-       setgate((struct gate_descriptor *)(ldtstore + LSYS5CALLS_SEL),
-           &IDTVEC(oosyscall), 0, SDT_SYS386CGT, SEL_UPL,
-           GSEL(GCODE_SEL, SEL_KPL));
-
-       *(struct mem_segment_descriptor *)(ldtstore + LUCODE_SEL) =
-           *GDT_ADDR_MEM(gdtstore, GUCODE_SEL);
-       *(struct mem_segment_descriptor *)(ldtstore + LUDATA_SEL) =
-           *GDT_ADDR_MEM(gdtstore, GUDATA_SEL);
-
-       /*
-        * 32 bit LDT entries.
-        */
-       ldt_segp = (struct mem_segment_descriptor *)(ldtstore + LUCODE32_SEL);
-       set_mem_segment(ldt_segp, 0, atop(VM_MAXUSER_ADDRESS32) - 1,
-           SDT_MEMERA, SEL_UPL, 1, 1, 0);
-       ldt_segp = (struct mem_segment_descriptor *)(ldtstore + LUDATA32_SEL);
-       set_mem_segment(ldt_segp, 0, atop(VM_MAXUSER_ADDRESS32) - 1,
-           SDT_MEMRWA, SEL_UPL, 1, 1, 0);
-
-       /*
-        * Other entries.
-        */
-       memcpy((struct gate_descriptor *)(ldtstore + LSOL26CALLS_SEL),
-           (struct gate_descriptor *)(ldtstore + LSYS5CALLS_SEL),
-           sizeof (struct gate_descriptor));
-       memcpy((struct gate_descriptor *)(ldtstore + LBSDICALLS_SEL),
-           (struct gate_descriptor *)(ldtstore + LSYS5CALLS_SEL),
-           sizeof (struct gate_descriptor));
 
        /* exceptions */
        for (x = 0; x < 32; x++) {
Index: amd64/pmap.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/pmap.c,v
retrieving revision 1.56
diff -u -p -r1.56 pmap.c
--- amd64/pmap.c        6 Sep 2010 17:36:49 -0000       1.56
+++ amd64/pmap.c        14 Oct 2010 06:36:47 -0000
@@ -121,6 +121,7 @@
 #include <uvm/uvm.h>
 
 #include <machine/atomic.h>
+#include <machine/lock.h>
 #include <machine/cpu.h>
 #include <machine/specialreg.h>
 #include <machine/gdt.h>
@@ -1022,11 +1023,6 @@ pmap_create(void)
        pmap->pm_stats.resident_count = 1;      /* count the PDP allocd below */
        pmap->pm_cpus = 0;
 
-       /* init the LDT */
-       pmap->pm_ldt = NULL;
-       pmap->pm_ldt_len = 0;
-       pmap->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL);
-
        /* allocate PDP */
 
        /*
@@ -1120,7 +1116,7 @@ pmap_reference(struct pmap *pmap)
 }
 
 /*
- * pmap_activate: activate a process' pmap (fill in %cr3 and LDT info)
+ * pmap_activate: activate a process' pmap (fill in %cr3)
  *
  * => called from cpu_switch()
  * => if p is the curproc, then load it into the MMU
@@ -1133,7 +1129,6 @@ pmap_activate(struct proc *p)
        struct pmap *pmap = p->p_vmspace->vm_map.pmap;
 
        pcb->pcb_pmap = pmap;
-       pcb->pcb_ldt_sel = pmap->pm_ldt_sel;
        pcb->pcb_cr3 = pmap->pm_pdirpa;
        if (p == curproc) {
                lcr3(pcb->pcb_cr3);
@@ -1143,8 +1138,6 @@ pmap_activate(struct proc *p)
                 */
                x86_atomic_setbits_ul(&pmap->pm_cpus, (1U << cpu_number()));
        }
-       if (pcb == curpcb)
-               lldt(pcb->pcb_ldt_sel);
 }
 
 /*
Index: amd64/trap.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/trap.c,v
retrieving revision 1.19
diff -u -p -r1.19 trap.c
--- amd64/trap.c        28 Sep 2010 03:53:14 -0000      1.19
+++ amd64/trap.c        14 Oct 2010 06:36:47 -0000
@@ -150,10 +150,7 @@ trap(struct trapframe *frame)
        struct proc *p = curproc;
        int type = (int)frame->tf_trapno;
        struct pcb *pcb;
-       extern char doreti_iret[], resume_iret[], IDTVEC(oosyscall)[];
-#if 0
-       extern char resume_pop_ds[], resume_pop_es[];
-#endif
+       extern char doreti_iret[], resume_iret[];
        caddr_t onfault;
        int error;
        uint64_t cr2;
@@ -420,13 +417,6 @@ faultcommon:
        }
 
        case T_TRCTRAP:
-               /* Check whether they single-stepped into a lcall. */
-               if (frame->tf_rip == (register_t)IDTVEC(oosyscall))
-                       return;
-               if (frame->tf_rip == (register_t)IDTVEC(oosyscall) + 1) {
-                       frame->tf_rflags &= ~PSL_T;
-                       return;
-               }
                goto we_re_toast;
 
        case T_BPTFLT|T_USER:           /* bpt instruction fault */
Index: amd64/vm_machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/vm_machdep.c,v
retrieving revision 1.22
diff -u -p -r1.22 vm_machdep.c
--- amd64/vm_machdep.c  9 Jun 2009 02:56:38 -0000       1.22
+++ amd64/vm_machdep.c  14 Oct 2010 06:36:47 -0000
@@ -107,7 +107,7 @@ cpu_fork(struct proc *p1, struct proc *p
        *pcb = p1->p_addr->u_pcb;
 
        /*
-        * Activate the address space.  Note this will refresh pcb_ldt_sel.
+        * Activate the address space.
         */
        pmap_activate(p2);

Reply via email to