This change relinks kernel objects randomly, and unmaps the bootcode
component of locore during boot.  This makes gadgets harder to find.

The current linker layout is:

        locore.o [bring-up code + asm runtime]
        rest of kernel .o, in order supplied by config(8)

The new linker layout is:

        locore.o [just the bring-up code]
        gap.o
        rest of kernel .o + new locore2.S [asm runtime], via sort -R

The gap.o being use some discussion.  This creates 5 random sized
gaps:
        Few pages after locore.o .text
        <pagesize pad before other .text
        <pagesize pad before .rodata
        <pagesize pad before .data
        <pagesize pad before .bss

resulting in the following layout:

        boot code
        [few pages of gap]
endboot:
        [partial page gap]
        rest of .text - randomized order
        [page-alignment]
        [partial page gap]
        .rodata
        [page-alignment]
        [partial page gap]
        .data
        [partial page gap]
        .data

When we supply the .o files to the linker in random order, their text
segments are placed in that random order.  The .rodata/.data/.bss for
each of those is also placed in the same order into their respective
sections.

Once the system is booted, we unmap the locore.o bring-up code and the
first few pages of gap.  (Cannot be too early, must be after "codepatch")

This bootcode is at a known location in KVA space.  At known offsets
within this .o text object, there are pointers to other .o in
particular to main().  By unmapping this bootcode, the addresses of
gadgets in the remaining objects become unknown.  Due to randomization
are not known.  There is no text-segment knowledge anymore about where
these objects are.  Obviously some leakage of KVA addresses occurs,
and cleanup will need to continue to ASLR more of those objects.

There are a few mitigation strategies against BROP attack methodology.
One can be summarized as "never reuse an address space".  If a freshly
linked kernel of this type was booted each time, we would be well on
the way to satisfying that.  Then other migitations efforts come into
play.

I've booted around 100 amd64 kernels, that is fairly well tested.  i386
hasn't been tested as well yet.

Index: arch/amd64/amd64/autoconf.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/autoconf.c,v
retrieving revision 1.47
diff -u -p -u -r1.47 autoconf.c
--- arch/amd64/amd64/autoconf.c 8 Jun 2016 17:24:44 -0000       1.47
+++ arch/amd64/amd64/autoconf.c 29 May 2017 13:04:34 -0000
@@ -59,6 +59,7 @@
 #include <sys/socketvar.h>
 #include <sys/timeout.h>
 #include <sys/hibernate.h>
+#include <uvm/uvm.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
@@ -105,6 +106,21 @@ void               aesni_setup(void);
 extern int     amd64_has_aesni;
 #endif
 
+void
+unmap_startup(void)
+{
+       extern void *kernel_text, *endboot;
+       vaddr_t p;
+
+       printf("unmap kernel init code %lx-%lx\n",
+           (vaddr_t)&kernel_text, (vaddr_t)&endboot);
+       p = (vaddr_t)&kernel_text;
+       do {
+               pmap_kremove(p, PAGE_SIZE);
+               p += NBPG;
+       } while (p < (vaddr_t)&endboot);
+}
+
 /*
  * Determine i/o configuration for a machine.
  */
@@ -122,6 +138,8 @@ cpu_configure(void)
        lapic_set_lvt();
        ioapic_enable();
 #endif
+
+       unmap_startup();
 
 #ifdef MULTIPROCESSOR
        cpu_init_idle_pcbs();
Index: arch/amd64/amd64/locore.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/locore.S,v
retrieving revision 1.84
diff -u -p -u -r1.84 locore.S
--- arch/amd64/amd64/locore.S   6 Feb 2017 09:15:51 -0000       1.84
+++ arch/amd64/amd64/locore.S   29 May 2017 20:52:28 -0000
@@ -131,115 +131,13 @@
 
 #include <machine/asm.h>
 
-#define SET_CURPROC(proc,cpu)                  \
-       movq    CPUVAR(SELF),cpu        ;       \
-       movq    proc,CPUVAR(CURPROC)      ;     \
-       movq    cpu,P_CPU(proc)
-
-#define GET_CURPCB(reg)                        movq    CPUVAR(CURPCB),reg      
-#define SET_CURPCB(reg)                        movq    reg,CPUVAR(CURPCB)
-
-
 /* XXX temporary kluge; these should not be here */
 /* Get definitions for IOM_BEGIN, IOM_END, and IOM_SIZE */
 #include <dev/isa/isareg.h>
 
-
-/*
- * Initialization
- */
-       .data
-
-#if NLAPIC > 0 
-       .align  NBPG, 0xcc
-       .globl _C_LABEL(local_apic), _C_LABEL(lapic_id), _C_LABEL(lapic_tpr)
-_C_LABEL(local_apic):
-       .space  LAPIC_ID
-_C_LABEL(lapic_id):
-       .long   0x00000000
-       .space  LAPIC_TPRI-(LAPIC_ID+4)
-_C_LABEL(lapic_tpr):
-       .space  LAPIC_PPRI-LAPIC_TPRI
-_C_LABEL(lapic_ppr):
-       .space  LAPIC_ISR-LAPIC_PPRI 
-_C_LABEL(lapic_isr):
-       .space  NBPG-LAPIC_ISR
-#endif
-
-       .globl  _C_LABEL(cpu_id),_C_LABEL(cpu_vendor)
-       .globl  _C_LABEL(cpuid_level),_C_LABEL(cpu_feature)
-       .globl  _C_LABEL(cpu_ebxfeature)
-       .globl  _C_LABEL(cpu_ecxfeature),_C_LABEL(ecpu_ecxfeature)
-       .globl  _C_LABEL(cpu_perf_eax)
-       .globl  _C_LABEL(cpu_perf_ebx)
-       .globl  _C_LABEL(cpu_perf_edx)
-       .globl  _C_LABEL(cpu_apmi_edx)
-       .globl  _C_LABEL(ssym),_C_LABEL(esym),_C_LABEL(boothowto)
-       .globl  _C_LABEL(bootdev)
-       .globl  _C_LABEL(bootinfo), _C_LABEL(bootinfo_size), _C_LABEL(atdevbase)
-       .globl  _C_LABEL(proc0paddr),_C_LABEL(PTDpaddr)
-       .globl  _C_LABEL(biosbasemem),_C_LABEL(biosextmem)
-       .globl  _C_LABEL(bootapiver)
-       .globl  _C_LABEL(pg_nx)
-_C_LABEL(cpu_id):      .long   0       # saved from `cpuid' instruction
-_C_LABEL(cpu_feature): .long   0       # feature flags from 'cpuid'
-                                       #   instruction
-_C_LABEL(cpu_ebxfeature):.long 0       # ext. ebx feature flags from 'cpuid'
-_C_LABEL(cpu_ecxfeature):.long 0       # ext. ecx feature flags from 'cpuid'
-_C_LABEL(ecpu_ecxfeature):.long        0       # extended ecx feature flags
-_C_LABEL(cpu_perf_eax):        .long   0       # arch. perf. mon. flags from 
'cpuid'
-_C_LABEL(cpu_perf_ebx):        .long   0       # arch. perf. mon. flags from 
'cpuid'
-_C_LABEL(cpu_perf_edx):        .long   0       # arch. perf. mon. flags from 
'cpuid'
-_C_LABEL(cpu_apmi_edx):        .long   0       # adv. power mgmt. info. from 
'cpuid'
-_C_LABEL(cpuid_level): .long   -1      # max. level accepted by 'cpuid'
-                                       #   instruction
-_C_LABEL(cpu_vendor):  .space  16      # vendor string returned by `cpuid'
-                                       #   instruction
-_C_LABEL(ssym):                .quad   0       # ptr to start of syms
-_C_LABEL(esym):                .quad   0       # ptr to end of syms
-_C_LABEL(atdevbase):   .quad   0       # location of start of iomem in virtual
-_C_LABEL(bootapiver):  .long   0       # /boot API version
-_C_LABEL(bootdev):     .long   0       # device we booted from
-_C_LABEL(proc0paddr):  .quad   0
-_C_LABEL(PTDpaddr):    .quad   0       # paddr of PTD, for libkvm
-#ifndef REALBASEMEM
-_C_LABEL(biosbasemem): .long   0       # base memory reported by BIOS
-#else
-_C_LABEL(biosbasemem): .long   REALBASEMEM
-#endif
-#ifndef REALEXTMEM
-_C_LABEL(biosextmem):  .long   0       # extended memory reported by BIOS
-#else
-_C_LABEL(biosextmem):  .long   REALEXTMEM
-#endif
-_C_LABEL(pg_nx):       .quad   0       # NX PTE bit (if CPU supports)
-
 #define        _RELOC(x)       ((x) - KERNBASE)
 #define        RELOC(x)        _RELOC(_C_LABEL(x))
 
-       .globl  gdt64
-
-gdt64:
-       .word   gdt64_end-gdt64_start-1
-       .quad   _RELOC(gdt64_start)
-.align 64, 0xcc
-
-gdt64_start:
-       .quad 0x0000000000000000        /* always empty */
-       .quad 0x00af9a000000ffff        /* kernel CS */
-       .quad 0x00cf92000000ffff        /* kernel DS */
-gdt64_end:
-
-farjmp64:
-       .long   longmode-KERNBASE
-       .word   GSEL(GCODE_SEL, SEL_KPL)
-
-       .space 512
-tmpstk:
-
-       .globl _C_LABEL(cpu_private)
-       .comm _C_LABEL(cpu_private),NBPG,NBPG
-
 /*
  * Some hackage to deal with 64bit symbols in 32 bit mode.
  * This may not be needed if things are cleaned up a little.
@@ -748,475 +646,6 @@ longmode_hi:
 
        call    _C_LABEL(main)
 
-/*****************************************************************************/
-
-/*
- * Signal trampoline; copied to top of user stack.
- * gdb's backtrace logic matches against the instructions in this.
- */
-       .section .rodata
-       .globl  _C_LABEL(sigcode)
-_C_LABEL(sigcode):
-       call    *%rax
-
-       movq    %rsp,%rdi
-       pushq   %rdi                    /* fake return address */
-       movq    $SYS_sigreturn,%rax
-       syscall
-       .globl  _C_LABEL(sigcoderet)
-_C_LABEL(sigcoderet):
-       movq    $SYS_exit,%rax
-       syscall
-       .globl  _C_LABEL(esigcode)
-_C_LABEL(esigcode):
-
-       .globl  _C_LABEL(sigfill)
-_C_LABEL(sigfill):
-       int3
-_C_LABEL(esigfill):
-       .globl  _C_LABEL(sigfillsiz)
-_C_LABEL(sigfillsiz):
-       .long   _C_LABEL(esigfill) - _C_LABEL(sigfill)
-
-       .text
-/*
- * void lgdt(struct region_descriptor *rdp);
- * Change the global descriptor table.
- */
-NENTRY(lgdt)
-       /* Reload the descriptor table. */
-       movq    %rdi,%rax
-       lgdt    (%rax)
-       /* Flush the prefetch q. */
-       jmp     1f
-       nop
-1:     /* Reload "stale" selectors. */
-       movl    $GSEL(GDATA_SEL, SEL_KPL),%eax
-       movl    %eax,%ds
-       movl    %eax,%es
-       movl    %eax,%ss
-       /* Reload code selector by doing intersegment return. */
-       popq    %rax
-       pushq   $GSEL(GCODE_SEL, SEL_KPL)
-       pushq   %rax
-       lretq
-
-ENTRY(setjmp)
-       /*
-        * Only save registers that must be preserved across function
-        * calls according to the ABI (%rbx, %rsp, %rbp, %r12-%r15)
-        * and %rip.
-        */
-       movq    %rdi,%rax
-       movq    %rbx,(%rax)
-       movq    %rsp,8(%rax)
-       movq    %rbp,16(%rax)
-       movq    %r12,24(%rax)
-       movq    %r13,32(%rax)
-       movq    %r14,40(%rax)
-       movq    %r15,48(%rax)
-       movq    (%rsp),%rdx
-       movq    %rdx,56(%rax)
-       xorl    %eax,%eax
-       ret
-
-ENTRY(longjmp)
-       movq    %rdi,%rax
-       movq    (%rax),%rbx
-       movq    8(%rax),%rsp
-       movq    16(%rax),%rbp
-       movq    24(%rax),%r12
-       movq    32(%rax),%r13
-       movq    40(%rax),%r14
-       movq    48(%rax),%r15
-       movq    56(%rax),%rdx
-       movq    %rdx,(%rsp)
-       xorl    %eax,%eax
-       incl    %eax
-       ret
-
-/*****************************************************************************/
-
-/*
- * int cpu_switchto(struct proc *old, struct proc *new)
- * Switch from "old" proc to "new".
- */
-ENTRY(cpu_switchto)
-       pushq   %rbx
-       pushq   %rbp
-       pushq   %r12
-       pushq   %r13
-       pushq   %r14
-       pushq   %r15
-
-       movq    %rdi, %r13
-       movq    %rsi, %r12
-
-       /* Record new proc. */
-       movb    $SONPROC,P_STAT(%r12)   # p->p_stat = SONPROC
-       SET_CURPROC(%r12,%rcx)
-
-       movl    CPUVAR(CPUID),%edi
-
-       /* If old proc exited, don't bother. */
-       testq   %r13,%r13
-       jz      switch_exited
-
-       /*
-        * Save old context.
-        *
-        * Registers:
-        *   %rax, %rcx - scratch
-        *   %r13 - old proc, then old pcb
-        *   %r12 - new proc
-        *   %edi - cpuid
-        */
-
-       movq    P_ADDR(%r13),%r13
-
-       /* clear the old pmap's bit for the cpu */
-       movq    PCB_PMAP(%r13),%rcx
-       lock
-       btrq    %rdi,PM_CPUS(%rcx)
-
-       /* Save stack pointers. */
-       movq    %rsp,PCB_RSP(%r13)
-       movq    %rbp,PCB_RBP(%r13)
-
-switch_exited:
-       /* did old proc run in userspace?  then reset the segment regs */
-       btrl    $CPUF_USERSEGS_BIT, CPUVAR(FLAGS)
-       jnc     restore_saved
-
-       /* set %ds, %es, and %fs to expected value to prevent info leak */
-       movw    $(GSEL(GUDATA_SEL, SEL_UPL)),%ax
-       movw    %ax,%ds
-       movw    %ax,%es
-       movw    %ax,%fs
-
-restore_saved:
-       /*
-        * Restore saved context.
-        *
-        * Registers:
-        *   %rax, %rcx, %rdx - scratch
-        *   %r13 - new pcb
-        *   %r12 - new process
-        */
-
-       /* No interrupts while loading new state. */
-       cli
-       movq    P_ADDR(%r12),%r13
-
-       /* Restore stack pointers. */
-       movq    PCB_RSP(%r13),%rsp
-       movq    PCB_RBP(%r13),%rbp
-
-       movq    CPUVAR(TSS),%rcx
-       movq    PCB_KSTACK(%r13),%rdx
-       movq    %rdx,TSS_RSP0(%rcx)
-
-       movq    PCB_CR3(%r13),%rax
-       movq    %rax,%cr3
-
-       /* Don't bother with the rest if switching to a system process. */
-       testl   $P_SYSTEM,P_FLAG(%r12)
-       jnz     switch_restored
-
-       /* set the new pmap's bit for the cpu */
-       movl    CPUVAR(CPUID),%edi
-       movq    PCB_PMAP(%r13),%rcx
-       lock
-       btsq    %rdi,PM_CPUS(%rcx)
-#ifdef DIAGNOSTIC
-       jc      _C_LABEL(switch_pmcpu_set)
-#endif
-
-switch_restored:
-       /* Restore cr0 (including FPU state). */
-       movl    PCB_CR0(%r13),%ecx
-#ifdef MULTIPROCESSOR
-       movq    PCB_FPCPU(%r13),%r8
-       cmpq    CPUVAR(SELF),%r8
-       jz      1f
-       orl     $CR0_TS,%ecx
-1:
-#endif
-       movq    %rcx,%cr0
-
-       SET_CURPCB(%r13)
-
-       /* Interrupts are okay again. */
-       sti
-
-switch_return:
-
-       popq    %r15
-       popq    %r14
-       popq    %r13
-       popq    %r12
-       popq    %rbp
-       popq    %rbx
-       ret
-
-ENTRY(cpu_idle_enter)
-       movq    _C_LABEL(cpu_idle_enter_fcn),%rax
-       cmpq    $0,%rax
-       je      1f
-       jmpq    *%rax
-1:
-       ret
-
-ENTRY(cpu_idle_cycle)
-       movq    _C_LABEL(cpu_idle_cycle_fcn),%rax
-       cmpq    $0,%rax
-       je      1f
-       call    *%rax
-       ret
-1:
-       sti
-       hlt
-       ret
-
-ENTRY(cpu_idle_leave)
-       movq    _C_LABEL(cpu_idle_leave_fcn),%rax
-       cmpq    $0,%rax
-       je      1f
-       jmpq    *%rax
-1:
-       ret
-
-       .globl  _C_LABEL(panic)
-
-#ifdef DIAGNOSTIC
-NENTRY(switch_pmcpu_set)
-       movabsq $1f,%rdi
-       call    _C_LABEL(panic)
-       /* NOTREACHED */
-1:     .asciz  "activate already active pmap"
-#endif /* DIAGNOSTIC */
-
-/*
- * savectx(struct pcb *pcb);
- * Update pcb, saving current processor state.
- */
-ENTRY(savectx)
-       /* Save stack pointers. */
-       movq    %rsp,PCB_RSP(%rdi)
-       movq    %rbp,PCB_RBP(%rdi)
-
-       ret
-
-IDTVEC(syscall32)
-       sysret          /* go away please */
-
-/*
- * syscall insn entry. This currently isn't much faster, but
- * it can be made faster in the future.
- */
-IDTVEC(syscall)
-       /*
-        * Enter here with interrupts blocked; %rcx contains the caller's
-        * %rip and the original rflags has been copied to %r11.  %cs and
-        * %ss have been updated to the kernel segments, but %rsp is still
-        * the user-space value.
-        * First order of business is to swap to the kernel gs.base so that
-        * we can access our struct cpu_info and use the scratch space there
-        * to switch to our kernel stack.  Once that's in place we can
-        * unblock interrupts and save the rest of the syscall frame.
-        */
-       swapgs
-       movq    %r15,CPUVAR(SCRATCH)
-       movq    CPUVAR(CURPCB),%r15
-       movq    PCB_KSTACK(%r15),%r15
-       xchgq   %r15,%rsp
-       sti
-
-       /*
-        * XXX don't need this whole frame, split of the
-        * syscall frame and trapframe is needed.
-        * First, leave some room for the trapno, error,
-        * ss:rsp, etc, so that all GP registers can be
-        * saved. Then, fill in the rest.
-        */
-       pushq   $(GSEL(GUDATA_SEL, SEL_UPL))
-       pushq   %r15
-       subq    $(TF_RSP-TF_TRAPNO),%rsp
-       movq    CPUVAR(SCRATCH),%r15
-       subq    $32,%rsp
-       INTR_SAVE_GPRS
-       movq    %r11, TF_RFLAGS(%rsp)   /* old rflags from syscall insn */
-       movq    $(GSEL(GUCODE_SEL, SEL_UPL)), TF_CS(%rsp)
-       movq    %rcx,TF_RIP(%rsp)
-       movq    $2,TF_ERR(%rsp)         /* ignored */
-
-       movq    CPUVAR(CURPROC),%r14
-       movq    %rsp,P_MD_REGS(%r14)    # save pointer to frame
-       andl    $~MDP_IRET,P_MD_FLAGS(%r14)
-       movq    %rsp,%rdi
-       call    _C_LABEL(syscall)
-
-.Lsyscall_check_asts:
-       /* Check for ASTs on exit to user mode. */
-       cli
-       CHECK_ASTPENDING(%r11)
-       je      2f
-       CLEAR_ASTPENDING(%r11)
-       sti
-       movq    %rsp,%rdi
-       call    _C_LABEL(ast)
-       jmp     .Lsyscall_check_asts
-
-2:
-#ifdef DIAGNOSTIC
-       cmpl    $IPL_NONE,CPUVAR(ILEVEL)
-       jne     .Lsyscall_spl_not_lowered
-#endif /* DIAGNOSTIC */
-
-       /* Could registers have been changed that require an iretq? */
-       testl   $MDP_IRET, P_MD_FLAGS(%r14)
-       jne     intr_fast_exit
-
-       movq    TF_RDI(%rsp),%rdi
-       movq    TF_RSI(%rsp),%rsi
-       movq    TF_R8(%rsp),%r8
-       movq    TF_R9(%rsp),%r9
-       movq    TF_R10(%rsp),%r10
-       movq    TF_R12(%rsp),%r12
-       movq    TF_R13(%rsp),%r13
-       movq    TF_R14(%rsp),%r14
-       movq    TF_R15(%rsp),%r15
-       movq    TF_RBP(%rsp),%rbp
-       movq    TF_RBX(%rsp),%rbx
-
-       INTR_RESTORE_SELECTORS
-
-       movq    TF_RDX(%rsp),%rdx
-       movq    TF_RAX(%rsp),%rax
-
-       movq    TF_RIP(%rsp),%rcx
-       movq    TF_RFLAGS(%rsp),%r11
-       movq    TF_RSP(%rsp),%rsp
-       sysretq
-
-#ifdef DIAGNOSTIC
-.Lsyscall_spl_not_lowered:
-       movabsq $4f, %rdi
-       movl    TF_RAX(%rsp),%esi
-       movl    TF_RDI(%rsp),%edx
-       movl    %ebx,%ecx
-       movl    CPUVAR(ILEVEL),%r8d
-       xorq    %rax,%rax
-       call    _C_LABEL(printf)
-#ifdef DDB
-       int     $3
-#endif /* DDB */
-       movl    $IPL_NONE,CPUVAR(ILEVEL)
-       jmp     .Lsyscall_check_asts
-4:     .asciz  "WARNING: SPL NOT LOWERED ON SYSCALL %d %d EXIT %x %x\n"
-#endif
-
-
-NENTRY(proc_trampoline)
-#ifdef MULTIPROCESSOR
-       call    _C_LABEL(proc_trampoline_mp)
-#endif
-       movl    $IPL_NONE,CPUVAR(ILEVEL)
-       movq    %r13,%rdi
-       call    *%r12
-       movq    CPUVAR(CURPROC),%r14
-       jmp     .Lsyscall_check_asts
-
-
-/*
- * Return via iretq, for real interrupts and signal returns
- */
-NENTRY(intr_fast_exit)
-       movq    TF_RDI(%rsp),%rdi
-       movq    TF_RSI(%rsp),%rsi
-       movq    TF_R8(%rsp),%r8
-       movq    TF_R9(%rsp),%r9
-       movq    TF_R10(%rsp),%r10
-       movq    TF_R12(%rsp),%r12
-       movq    TF_R13(%rsp),%r13
-       movq    TF_R14(%rsp),%r14
-       movq    TF_R15(%rsp),%r15
-       movq    TF_RBP(%rsp),%rbp
-       movq    TF_RBX(%rsp),%rbx
-
-       testq   $SEL_RPL,TF_CS(%rsp)
-       je      5f
-
-       INTR_RESTORE_SELECTORS
-
-5:     movq    TF_RDX(%rsp),%rdx
-       movq    TF_RCX(%rsp),%rcx
-       movq    TF_R11(%rsp),%r11
-       movq    TF_RAX(%rsp),%rax
-
-#if !defined(GPROF) && defined(DDBPROF)
-       /*
-        * If we are returning from a probe trap we need to fix the
-        * stack layout and emulate the patched instruction.
-        *
-        * The code below does that by trashing %rax, so it MUST be
-        * restored afterward.
-        */
-       cmpl    $INTR_FAKE_TRAP, TF_ERR(%rsp)
-       je      .Lprobe_fixup
-#endif /* !defined(GPROF) && defined(DDBPROF) */
-
-       addq    $TF_RIP,%rsp
-
-       .globl  _C_LABEL(doreti_iret)
-_C_LABEL(doreti_iret):
-       iretq
-
-
-#if !defined(GPROF) && defined(DDBPROF)
-.Lprobe_fixup:
-       /* Reserve enough room to emulate "pushq %rbp". */
-       subq    $16, %rsp
-
-       /* Shift hardware-saved registers. */
-       movq    (TF_RIP + 16)(%rsp), %rax
-       movq    %rax, TF_RIP(%rsp)
-       movq    (TF_CS + 16)(%rsp), %rax
-       movq    %rax, TF_CS(%rsp)
-       movq    (TF_RFLAGS + 16)(%rsp), %rax
-       movq    %rax, TF_RFLAGS(%rsp)
-       movq    (TF_RSP + 16)(%rsp), %rax
-       movq    %rax, TF_RSP(%rsp)
-       movq    (TF_SS + 16)(%rsp), %rax
-       movq    %rax, TF_SS(%rsp)
-
-       /* Pull 8 bytes off the stack and store %rbp in the expected location.*/
-       movq    TF_RSP(%rsp), %rax
-       subq    $8, %rax
-       movq    %rax, TF_RSP(%rsp)
-       movq    %rbp, (%rax)
-
-       /* Write back overwritten %rax */
-       movq    (TF_RAX + 16)(%rsp),%rax
-
-       addq    $TF_RIP,%rsp
-       iretq
-#endif /* !defined(GPROF) && defined(DDBPROF) */
-
-ENTRY(pagezero)
-       movq    $-PAGE_SIZE,%rdx
-       subq    %rdx,%rdi
-       xorq    %rax,%rax
-1:
-       movnti  %rax,(%rdi,%rdx)
-       movnti  %rax,8(%rdi,%rdx)
-       movnti  %rax,16(%rdi,%rdx)
-       movnti  %rax,24(%rdi,%rdx)
-       addq    $32,%rdx
-       jne     1b
-       sfence
-       ret
-
        .section .codepatch,"a"
        .align  8, 0xcc
        .globl _C_LABEL(codepatch_begin)
@@ -1228,20 +657,16 @@ _C_LABEL(codepatch_begin):
 _C_LABEL(codepatch_end):
        .previous
 
-#if NXEN > 0
-       /* Hypercall page needs to be page aligned */
-       .text
-       .align  NBPG, 0xcc
-       .globl  _C_LABEL(xen_hypercall_page)
-_C_LABEL(xen_hypercall_page):
-       .skip   0x1000, 0xcc
-#endif /* NXEN > 0 */
+       .data
+
+farjmp64:
+       .long   longmode-KERNBASE
+       .word   GSEL(GCODE_SEL, SEL_KPL)
+
+       .globl _C_LABEL(cpu_private)
+       .comm _C_LABEL(cpu_private),NBPG,NBPG
+
+/* XXX we want some guard here */
+       .space 512
+tmpstk:
 
-#if NHYPERV > 0
-       /* Hypercall page needs to be page aligned */
-       .text
-       .align  NBPG, 0xcc
-       .globl  _C_LABEL(hv_hypercall_page)
-_C_LABEL(hv_hypercall_page):
-       .skip   0x1000, 0xcc
-#endif /* NXEN > 0 */
Index: arch/amd64/amd64/locore2.S
===================================================================
RCS file: arch/amd64/amd64/locore2.S
diff -N arch/amd64/amd64/locore2.S
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ arch/amd64/amd64/locore2.S  29 May 2017 20:51:07 -0000
@@ -0,0 +1,721 @@
+/*     $OpenBSD: locore.S,v 1.84 2017/02/06 09:15:51 mpi Exp $ */
+/*     $NetBSD: locore.S,v 1.13 2004/03/25 18:33:17 drochner Exp $     */
+
+/*
+ * Copyright-o-rama!
+ */
+
+/*
+ * Copyright (c) 2001 Wasabi Systems, Inc.
+ * All rights reserved.
+ *
+ * Written by Frank van der Linden for Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed for the NetBSD Project by
+ *      Wasabi Systems, Inc.
+ * 4. The name of Wasabi Systems, Inc. may not be used to endorse
+ *    or promote products derived from this software without specific prior
+ *    written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+/*-
+ * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Charles M. Hannum.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     @(#)locore.s    7.3 (Berkeley) 5/13/91
+ */
+
+#include "assym.h"
+#include "lapic.h"
+#include "ksyms.h"
+#include "xen.h"
+#include "hyperv.h"
+
+#include <sys/syscall.h>
+
+#include <machine/param.h>
+#include <machine/segments.h>
+#include <machine/specialreg.h>
+#include <machine/trap.h>
+#include <machine/frameasm.h>
+
+#define SET_CURPROC(proc,cpu)                  \
+       movq    CPUVAR(SELF),cpu        ;       \
+       movq    proc,CPUVAR(CURPROC)      ;     \
+       movq    cpu,P_CPU(proc)
+
+#define GET_CURPCB(reg)                        movq    CPUVAR(CURPCB),reg      
+#define SET_CURPCB(reg)                        movq    reg,CPUVAR(CURPCB)
+
+#if NLAPIC > 0
+#include <machine/i82489reg.h>
+#endif
+
+/*
+ * override user-land alignment before including asm.h
+ */
+#define        ALIGN_DATA      .align  8
+#define ALIGN_TEXT     .align 16,0x90
+#define _ALIGN_TEXT    ALIGN_TEXT
+
+#include <machine/asm.h>
+
+
+
+/*
+ * Initialization
+ */
+       .data
+
+#if NLAPIC > 0 
+       .align  NBPG, 0xcc
+       .globl _C_LABEL(local_apic), _C_LABEL(lapic_id), _C_LABEL(lapic_tpr)
+_C_LABEL(local_apic):
+       .space  LAPIC_ID
+_C_LABEL(lapic_id):
+       .long   0x00000000
+       .space  LAPIC_TPRI-(LAPIC_ID+4)
+_C_LABEL(lapic_tpr):
+       .space  LAPIC_PPRI-LAPIC_TPRI
+_C_LABEL(lapic_ppr):
+       .space  LAPIC_ISR-LAPIC_PPRI 
+_C_LABEL(lapic_isr):
+       .space  NBPG-LAPIC_ISR
+#endif
+
+       .globl  _C_LABEL(cpu_id),_C_LABEL(cpu_vendor)
+       .globl  _C_LABEL(cpuid_level),_C_LABEL(cpu_feature)
+       .globl  _C_LABEL(cpu_ebxfeature)
+       .globl  _C_LABEL(cpu_ecxfeature),_C_LABEL(ecpu_ecxfeature)
+       .globl  _C_LABEL(cpu_perf_eax)
+       .globl  _C_LABEL(cpu_perf_ebx)
+       .globl  _C_LABEL(cpu_perf_edx)
+       .globl  _C_LABEL(cpu_apmi_edx)
+       .globl  _C_LABEL(ssym),_C_LABEL(esym),_C_LABEL(boothowto)
+       .globl  _C_LABEL(bootdev)
+       .globl  _C_LABEL(bootinfo), _C_LABEL(bootinfo_size), _C_LABEL(atdevbase)
+       .globl  _C_LABEL(proc0paddr),_C_LABEL(PTDpaddr)
+       .globl  _C_LABEL(biosbasemem),_C_LABEL(biosextmem)
+       .globl  _C_LABEL(bootapiver)
+       .globl  _C_LABEL(pg_nx)
+_C_LABEL(cpu_id):      .long   0       # saved from `cpuid' instruction
+_C_LABEL(cpu_feature): .long   0       # feature flags from 'cpuid'
+                                       #   instruction
+_C_LABEL(cpu_ebxfeature):.long 0       # ext. ebx feature flags from 'cpuid'
+_C_LABEL(cpu_ecxfeature):.long 0       # ext. ecx feature flags from 'cpuid'
+_C_LABEL(ecpu_ecxfeature):.long        0       # extended ecx feature flags
+_C_LABEL(cpu_perf_eax):        .long   0       # arch. perf. mon. flags from 
'cpuid'
+_C_LABEL(cpu_perf_ebx):        .long   0       # arch. perf. mon. flags from 
'cpuid'
+_C_LABEL(cpu_perf_edx):        .long   0       # arch. perf. mon. flags from 
'cpuid'
+_C_LABEL(cpu_apmi_edx):        .long   0       # adv. power mgmt. info. from 
'cpuid'
+_C_LABEL(cpuid_level): .long   -1      # max. level accepted by 'cpuid'
+                                       #   instruction
+_C_LABEL(cpu_vendor):  .space  16      # vendor string returned by `cpuid'
+                                       #   instruction
+_C_LABEL(ssym):                .quad   0       # ptr to start of syms
+_C_LABEL(esym):                .quad   0       # ptr to end of syms
+_C_LABEL(atdevbase):   .quad   0       # location of start of iomem in virtual
+_C_LABEL(bootapiver):  .long   0       # /boot API version
+_C_LABEL(bootdev):     .long   0       # device we booted from
+_C_LABEL(proc0paddr):  .quad   0
+_C_LABEL(PTDpaddr):    .quad   0       # paddr of PTD, for libkvm
+#ifndef REALBASEMEM
+_C_LABEL(biosbasemem): .long   0       # base memory reported by BIOS
+#else
+_C_LABEL(biosbasemem): .long   REALBASEMEM
+#endif
+#ifndef REALEXTMEM
+_C_LABEL(biosextmem):  .long   0       # extended memory reported by BIOS
+#else
+_C_LABEL(biosextmem):  .long   REALEXTMEM
+#endif
+_C_LABEL(pg_nx):       .quad   0       # NX PTE bit (if CPU supports)
+
+#define        _RELOC(x)       ((x) - KERNBASE)
+#define        RELOC(x)        _RELOC(_C_LABEL(x))
+
+       .globl  gdt64
+
+gdt64:
+       .word   gdt64_end-gdt64_start-1
+       .quad   _RELOC(gdt64_start)
+.align 64, 0xcc
+
+gdt64_start:
+       .quad 0x0000000000000000        /* always empty */
+       .quad 0x00af9a000000ffff        /* kernel CS */
+       .quad 0x00cf92000000ffff        /* kernel DS */
+gdt64_end:
+
+/*
+ * Some hackage to deal with 64bit symbols in 32 bit mode.
+ * This may not be needed if things are cleaned up a little.
+ */
+
+/*****************************************************************************/
+
+/*
+ * Signal trampoline; copied to top of user stack.
+ * gdb's backtrace logic matches against the instructions in this.
+ */
+       .section .rodata
+       .globl  _C_LABEL(sigcode)
+_C_LABEL(sigcode):
+       call    *%rax
+
+       movq    %rsp,%rdi
+       pushq   %rdi                    /* fake return address */
+       movq    $SYS_sigreturn,%rax
+       syscall
+       .globl  _C_LABEL(sigcoderet)
+_C_LABEL(sigcoderet):
+       movq    $SYS_exit,%rax
+       syscall
+       .globl  _C_LABEL(esigcode)
+_C_LABEL(esigcode):
+
+       .globl  _C_LABEL(sigfill)
+_C_LABEL(sigfill):
+       int3
+_C_LABEL(esigfill):
+       .globl  _C_LABEL(sigfillsiz)
+_C_LABEL(sigfillsiz):
+       .long   _C_LABEL(esigfill) - _C_LABEL(sigfill)
+
+       .text
+       ALIGN_TEXT
+
+/*
+ * void lgdt(struct region_descriptor *rdp);
+ * Change the global descriptor table.
+ */
+NENTRY(lgdt)
+       /* Reload the descriptor table. */
+       movq    %rdi,%rax
+       lgdt    (%rax)
+       /* Flush the prefetch q. */
+       jmp     1f
+       nop
+1:     /* Reload "stale" selectors. */
+       movl    $GSEL(GDATA_SEL, SEL_KPL),%eax
+       movl    %eax,%ds
+       movl    %eax,%es
+       movl    %eax,%ss
+       /* Reload code selector by doing intersegment return. */
+       popq    %rax
+       pushq   $GSEL(GCODE_SEL, SEL_KPL)
+       pushq   %rax
+       lretq
+
+ENTRY(setjmp)
+       /*
+        * Only save registers that must be preserved across function
+        * calls according to the ABI (%rbx, %rsp, %rbp, %r12-%r15)
+        * and %rip.
+        */
+       movq    %rdi,%rax
+       movq    %rbx,(%rax)
+       movq    %rsp,8(%rax)
+       movq    %rbp,16(%rax)
+       movq    %r12,24(%rax)
+       movq    %r13,32(%rax)
+       movq    %r14,40(%rax)
+       movq    %r15,48(%rax)
+       movq    (%rsp),%rdx
+       movq    %rdx,56(%rax)
+       xorl    %eax,%eax
+       ret
+
+ENTRY(longjmp)
+       movq    %rdi,%rax
+       movq    (%rax),%rbx
+       movq    8(%rax),%rsp
+       movq    16(%rax),%rbp
+       movq    24(%rax),%r12
+       movq    32(%rax),%r13
+       movq    40(%rax),%r14
+       movq    48(%rax),%r15
+       movq    56(%rax),%rdx
+       movq    %rdx,(%rsp)
+       xorl    %eax,%eax
+       incl    %eax
+       ret
+
+/*****************************************************************************/
+
+/*
+ * int cpu_switchto(struct proc *old, struct proc *new)
+ * Switch from "old" proc to "new".
+ */
+ENTRY(cpu_switchto)
+       pushq   %rbx
+       pushq   %rbp
+       pushq   %r12
+       pushq   %r13
+       pushq   %r14
+       pushq   %r15
+
+       movq    %rdi, %r13
+       movq    %rsi, %r12
+
+       /* Record new proc. */
+       movb    $SONPROC,P_STAT(%r12)   # p->p_stat = SONPROC
+       SET_CURPROC(%r12,%rcx)
+
+       movl    CPUVAR(CPUID),%edi
+
+       /* If old proc exited, don't bother. */
+       testq   %r13,%r13
+       jz      switch_exited
+
+       /*
+        * Save old context.
+        *
+        * Registers:
+        *   %rax, %rcx - scratch
+        *   %r13 - old proc, then old pcb
+        *   %r12 - new proc
+        *   %edi - cpuid
+        */
+
+       movq    P_ADDR(%r13),%r13
+
+       /* clear the old pmap's bit for the cpu */
+       movq    PCB_PMAP(%r13),%rcx
+       lock
+       btrq    %rdi,PM_CPUS(%rcx)
+
+       /* Save stack pointers. */
+       movq    %rsp,PCB_RSP(%r13)
+       movq    %rbp,PCB_RBP(%r13)
+
+switch_exited:
+       /* did old proc run in userspace?  then reset the segment regs */
+       btrl    $CPUF_USERSEGS_BIT, CPUVAR(FLAGS)
+       jnc     restore_saved
+
+       /* set %ds, %es, and %fs to expected value to prevent info leak */
+       movw    $(GSEL(GUDATA_SEL, SEL_UPL)),%ax
+       movw    %ax,%ds
+       movw    %ax,%es
+       movw    %ax,%fs
+
+restore_saved:
+       /*
+        * Restore saved context.
+        *
+        * Registers:
+        *   %rax, %rcx, %rdx - scratch
+        *   %r13 - new pcb
+        *   %r12 - new process
+        */
+
+       /* No interrupts while loading new state. */
+       cli
+       movq    P_ADDR(%r12),%r13
+
+       /* Restore stack pointers. */
+       movq    PCB_RSP(%r13),%rsp
+       movq    PCB_RBP(%r13),%rbp
+
+       movq    CPUVAR(TSS),%rcx
+       movq    PCB_KSTACK(%r13),%rdx
+       movq    %rdx,TSS_RSP0(%rcx)
+
+       movq    PCB_CR3(%r13),%rax
+       movq    %rax,%cr3
+
+       /* Don't bother with the rest if switching to a system process. */
+       testl   $P_SYSTEM,P_FLAG(%r12)
+       jnz     switch_restored
+
+       /* set the new pmap's bit for the cpu */
+       movl    CPUVAR(CPUID),%edi
+       movq    PCB_PMAP(%r13),%rcx
+       lock
+       btsq    %rdi,PM_CPUS(%rcx)
+#ifdef DIAGNOSTIC
+       jc      _C_LABEL(switch_pmcpu_set)
+#endif
+
+switch_restored:
+       /* Restore cr0 (including FPU state). */
+       movl    PCB_CR0(%r13),%ecx
+#ifdef MULTIPROCESSOR
+       movq    PCB_FPCPU(%r13),%r8
+       cmpq    CPUVAR(SELF),%r8
+       jz      1f
+       orl     $CR0_TS,%ecx
+1:
+#endif
+       movq    %rcx,%cr0
+
+       SET_CURPCB(%r13)
+
+       /* Interrupts are okay again. */
+       sti
+
+switch_return:
+
+       popq    %r15
+       popq    %r14
+       popq    %r13
+       popq    %r12
+       popq    %rbp
+       popq    %rbx
+       ret
+
+ENTRY(cpu_idle_enter)
+       movq    _C_LABEL(cpu_idle_enter_fcn),%rax
+       cmpq    $0,%rax
+       je      1f
+       jmpq    *%rax
+1:
+       ret
+
+ENTRY(cpu_idle_cycle)
+       movq    _C_LABEL(cpu_idle_cycle_fcn),%rax
+       cmpq    $0,%rax
+       je      1f
+       call    *%rax
+       ret
+1:
+       sti
+       hlt
+       ret
+
+ENTRY(cpu_idle_leave)
+       movq    _C_LABEL(cpu_idle_leave_fcn),%rax
+       cmpq    $0,%rax
+       je      1f
+       jmpq    *%rax
+1:
+       ret
+
+       .globl  _C_LABEL(panic)
+
+#ifdef DIAGNOSTIC
+NENTRY(switch_pmcpu_set)
+       movabsq $1f,%rdi
+       call    _C_LABEL(panic)
+       /* NOTREACHED */
+1:     .asciz  "activate already active pmap"
+#endif /* DIAGNOSTIC */
+
+/*
+ * savectx(struct pcb *pcb);
+ * Update pcb, saving current processor state.
+ */
+ENTRY(savectx)
+       /* Save stack pointers. */
+       movq    %rsp,PCB_RSP(%rdi)
+       movq    %rbp,PCB_RBP(%rdi)
+
+       ret
+
+IDTVEC(syscall32)
+       sysret          /* go away please */
+
+/*
+ * syscall insn entry. This currently isn't much faster, but
+ * it can be made faster in the future.
+ */
+IDTVEC(syscall)
+       /*
+        * Enter here with interrupts blocked; %rcx contains the caller's
+        * %rip and the original rflags has been copied to %r11.  %cs and
+        * %ss have been updated to the kernel segments, but %rsp is still
+        * the user-space value.
+        * First order of business is to swap to the kernel gs.base so that
+        * we can access our struct cpu_info and use the scratch space there
+        * to switch to our kernel stack.  Once that's in place we can
+        * unblock interrupts and save the rest of the syscall frame.
+        */
+       swapgs
+       movq    %r15,CPUVAR(SCRATCH)
+       movq    CPUVAR(CURPCB),%r15
+       movq    PCB_KSTACK(%r15),%r15
+       xchgq   %r15,%rsp
+       sti
+
+       /*
+        * XXX don't need this whole frame, split of the
+        * syscall frame and trapframe is needed.
+        * First, leave some room for the trapno, error,
+        * ss:rsp, etc, so that all GP registers can be
+        * saved. Then, fill in the rest.
+        */
+       pushq   $(GSEL(GUDATA_SEL, SEL_UPL))
+       pushq   %r15
+       subq    $(TF_RSP-TF_TRAPNO),%rsp
+       movq    CPUVAR(SCRATCH),%r15
+       subq    $32,%rsp
+       INTR_SAVE_GPRS
+       movq    %r11, TF_RFLAGS(%rsp)   /* old rflags from syscall insn */
+       movq    $(GSEL(GUCODE_SEL, SEL_UPL)), TF_CS(%rsp)
+       movq    %rcx,TF_RIP(%rsp)
+       movq    $2,TF_ERR(%rsp)         /* ignored */
+
+       movq    CPUVAR(CURPROC),%r14
+       movq    %rsp,P_MD_REGS(%r14)    # save pointer to frame
+       andl    $~MDP_IRET,P_MD_FLAGS(%r14)
+       movq    %rsp,%rdi
+       call    _C_LABEL(syscall)
+
+.Lsyscall_check_asts:
+       /* Check for ASTs on exit to user mode. */
+       cli
+       CHECK_ASTPENDING(%r11)
+       je      2f
+       CLEAR_ASTPENDING(%r11)
+       sti
+       movq    %rsp,%rdi
+       call    _C_LABEL(ast)
+       jmp     .Lsyscall_check_asts
+
+2:
+#ifdef DIAGNOSTIC
+       cmpl    $IPL_NONE,CPUVAR(ILEVEL)
+       jne     .Lsyscall_spl_not_lowered
+#endif /* DIAGNOSTIC */
+
+       /* Could registers have been changed that require an iretq? */
+       testl   $MDP_IRET, P_MD_FLAGS(%r14)
+       jne     intr_fast_exit
+
+       movq    TF_RDI(%rsp),%rdi
+       movq    TF_RSI(%rsp),%rsi
+       movq    TF_R8(%rsp),%r8
+       movq    TF_R9(%rsp),%r9
+       movq    TF_R10(%rsp),%r10
+       movq    TF_R12(%rsp),%r12
+       movq    TF_R13(%rsp),%r13
+       movq    TF_R14(%rsp),%r14
+       movq    TF_R15(%rsp),%r15
+       movq    TF_RBP(%rsp),%rbp
+       movq    TF_RBX(%rsp),%rbx
+
+       INTR_RESTORE_SELECTORS
+
+       movq    TF_RDX(%rsp),%rdx
+       movq    TF_RAX(%rsp),%rax
+
+       movq    TF_RIP(%rsp),%rcx
+       movq    TF_RFLAGS(%rsp),%r11
+       movq    TF_RSP(%rsp),%rsp
+       sysretq
+
+#ifdef DIAGNOSTIC
+.Lsyscall_spl_not_lowered:
+       movabsq $4f, %rdi
+       movl    TF_RAX(%rsp),%esi
+       movl    TF_RDI(%rsp),%edx
+       movl    %ebx,%ecx
+       movl    CPUVAR(ILEVEL),%r8d
+       xorq    %rax,%rax
+       call    _C_LABEL(printf)
+#ifdef DDB
+       int     $3
+#endif /* DDB */
+       movl    $IPL_NONE,CPUVAR(ILEVEL)
+       jmp     .Lsyscall_check_asts
+4:     .asciz  "WARNING: SPL NOT LOWERED ON SYSCALL %d %d EXIT %x %x\n"
+#endif
+
+
+NENTRY(proc_trampoline)
+#ifdef MULTIPROCESSOR
+       call    _C_LABEL(proc_trampoline_mp)
+#endif
+       movl    $IPL_NONE,CPUVAR(ILEVEL)
+       movq    %r13,%rdi
+       call    *%r12
+       movq    CPUVAR(CURPROC),%r14
+       jmp     .Lsyscall_check_asts
+
+
+/*
+ * Return via iretq, for real interrupts and signal returns
+ */
+NENTRY(intr_fast_exit)
+       movq    TF_RDI(%rsp),%rdi
+       movq    TF_RSI(%rsp),%rsi
+       movq    TF_R8(%rsp),%r8
+       movq    TF_R9(%rsp),%r9
+       movq    TF_R10(%rsp),%r10
+       movq    TF_R12(%rsp),%r12
+       movq    TF_R13(%rsp),%r13
+       movq    TF_R14(%rsp),%r14
+       movq    TF_R15(%rsp),%r15
+       movq    TF_RBP(%rsp),%rbp
+       movq    TF_RBX(%rsp),%rbx
+
+       testq   $SEL_RPL,TF_CS(%rsp)
+       je      5f
+
+       INTR_RESTORE_SELECTORS
+
+5:     movq    TF_RDX(%rsp),%rdx
+       movq    TF_RCX(%rsp),%rcx
+       movq    TF_R11(%rsp),%r11
+       movq    TF_RAX(%rsp),%rax
+
+#if !defined(GPROF) && defined(DDBPROF)
+       /*
+        * If we are returning from a probe trap we need to fix the
+        * stack layout and emulate the patched instruction.
+        *
+        * The code below does that by trashing %rax, so it MUST be
+        * restored afterward.
+        */
+       cmpl    $INTR_FAKE_TRAP, TF_ERR(%rsp)
+       je      .Lprobe_fixup
+#endif /* !defined(GPROF) && defined(DDBPROF) */
+
+       addq    $TF_RIP,%rsp
+
+       .globl  _C_LABEL(doreti_iret)
+_C_LABEL(doreti_iret):
+       iretq
+
+
+#if !defined(GPROF) && defined(DDBPROF)
+.Lprobe_fixup:
+       /* Reserve enough room to emulate "pushq %rbp". */
+       subq    $16, %rsp
+
+       /* Shift hardware-saved registers. */
+       movq    (TF_RIP + 16)(%rsp), %rax
+       movq    %rax, TF_RIP(%rsp)
+       movq    (TF_CS + 16)(%rsp), %rax
+       movq    %rax, TF_CS(%rsp)
+       movq    (TF_RFLAGS + 16)(%rsp), %rax
+       movq    %rax, TF_RFLAGS(%rsp)
+       movq    (TF_RSP + 16)(%rsp), %rax
+       movq    %rax, TF_RSP(%rsp)
+       movq    (TF_SS + 16)(%rsp), %rax
+       movq    %rax, TF_SS(%rsp)
+
+       /* Pull 8 bytes off the stack and store %rbp in the expected location.*/
+       movq    TF_RSP(%rsp), %rax
+       subq    $8, %rax
+       movq    %rax, TF_RSP(%rsp)
+       movq    %rbp, (%rax)
+
+       /* Write back overwritten %rax */
+       movq    (TF_RAX + 16)(%rsp),%rax
+
+       addq    $TF_RIP,%rsp
+       iretq
+#endif /* !defined(GPROF) && defined(DDBPROF) */
+
+ENTRY(pagezero)
+       movq    $-PAGE_SIZE,%rdx
+       subq    %rdx,%rdi
+       xorq    %rax,%rax
+1:
+       movnti  %rax,(%rdi,%rdx)
+       movnti  %rax,8(%rdi,%rdx)
+       movnti  %rax,16(%rdi,%rdx)
+       movnti  %rax,24(%rdi,%rdx)
+       addq    $32,%rdx
+       jne     1b
+       sfence
+       ret
+
+#if NXEN > 0
+       /* Hypercall page needs to be page aligned */
+       .text
+       .align  NBPG, 0xcc
+       .globl  _C_LABEL(xen_hypercall_page)
+_C_LABEL(xen_hypercall_page):
+       .skip   0x1000, 0xcc
+#endif /* NXEN > 0 */
+
+#if NHYPERV > 0
+       /* Hypercall page needs to be page aligned */
+       .text
+       .align  NBPG, 0xcc
+       .globl  _C_LABEL(hv_hypercall_page)
+_C_LABEL(hv_hypercall_page):
+       .skip   0x1000, 0xcc
+#endif /* NXEN > 0 */
Index: arch/amd64/conf/Makefile.amd64
===================================================================
RCS file: /cvs/src/sys/arch/amd64/conf/Makefile.amd64,v
retrieving revision 1.76
diff -u -p -u -r1.76 Makefile.amd64
--- arch/amd64/conf/Makefile.amd64      8 May 2017 00:13:38 -0000       1.76
+++ arch/amd64/conf/Makefile.amd64      30 May 2017 07:28:14 -0000
@@ -30,6 +30,7 @@ CWARNFLAGS=   -Werror -Wall -Wimplicit-fun
 CMACHFLAGS=    -mcmodel=kernel -mno-red-zone -mno-sse2 -mno-sse -mno-3dnow \
                -mno-mmx -msoft-float -fno-omit-frame-pointer
 CMACHFLAGS+=   -ffreestanding ${NOPIE_FLAGS}
+SORTR=         sort -R
 .if ${IDENT:M-DNO_PROPOLICE}
 CMACHFLAGS+=   -fno-stack-protector
 .endif
@@ -38,6 +39,7 @@ CMACHFLAGS+=  -msave-args
 .endif
 .if ${IDENT:M-DSMALL_KERNEL}
 CMACHFLAGS+=   -Wa,-n
+SORTR=         cat
 .endif
 
 DEBUG?=                -g
@@ -73,12 +75,13 @@ NORMAL_S=   ${CC} ${AFLAGS} ${CPPFLAGS} -c
 #      ${SYSTEM_LD_HEAD}
 #      ${SYSTEM_LD} swapxxx.o
 #      ${SYSTEM_LD_TAIL}
-SYSTEM_HEAD=   locore.o param.o ioconf.o
-SYSTEM_OBJ=    ${SYSTEM_HEAD} ${OBJS}
+SYSTEM_HEAD=   locore.o gap.o
+SYSTEM_OBJ=    ${SYSTEM_HEAD} ${OBJS} param.o ioconf.o
 SYSTEM_DEP=    Makefile ${SYSTEM_OBJ} ${LDSCRIPT}
 SYSTEM_LD_HEAD=        @rm -f $@
 SYSTEM_LD=     @echo ${LD} ${LINKFLAGS} -o $@ '$${SYSTEM_HEAD} vers.o 
$${OBJS}'; \
-               ${LD} ${LINKFLAGS} -o $@ ${SYSTEM_HEAD} vers.o ${OBJS}
+               echo ${OBJS} param.o ioconf.o vers.o | tr " " "\n" | ${SORTR} > 
lorder; \
+               ${LD} ${LINKFLAGS} -o $@ ${SYSTEM_HEAD} `cat lorder`
 SYSTEM_LD_TAIL=        @${SIZE} $@; chmod 755 $@
 
 .if ${DEBUG} == "-g"
@@ -122,8 +125,16 @@ vers.o: ${SYSTEM_DEP} ${SYSTEM_SWAP_DEP}
        sh $S/conf/newvers.sh
        ${CC} ${CFLAGS} ${CPPFLAGS} ${PROF} -c vers.c
 
+gap.S: ${SYSTEM_SWAP_DEP} Makefile
+       #echo "#include <machine/asm.h>\n\t.text\n\t.space 
$$RANDOM*3,0xcc\n\t.align 4096,0xcc\n\t.globl 
endboot\n_C_LABEL(endboot):\n\t.space 4096+$$RANDOM%4096,0xcc\n\t.align 
16,0xcc" > gap.S
+       sh $S/conf/makegap.sh > gap.S
+
+gap.o: gap.S
+       ${CC} ${AFLAGS} ${CPPFLAGS} ${PROF} -c gap.S
+
 clean:
-       rm -f *bsd *bsd.gdb *.[dio] [a-z]*.s assym.* ${DB_STRUCTINFO} param.c
+       rm -f *bsd *bsd.gdb *.[dio] [a-z]*.s assym.* ${DB_STRUCTINFO} \
+           gap.S ldorder param.c
 
 cleandir: clean
        rm -f Makefile *.h ioconf.c options machine ${_mach} vers.c
@@ -136,7 +147,8 @@ db_structinfo.h: $S/ddb/db_structinfo.c 
        rm -f db_structinfo.o
 
 locore.o: ${_machdir}/${_mach}/locore.S assym.h
-mutex.o vector.o copy.o spl.o mptramp.o acpi_wakecode.o vmm_support.o: assym.h
+locore2.o mutex.o vector.o copy.o spl.o: assym.h
+mptramp.o acpi_wakecode.o vmm_support.o: assym.h
 
 # The install target can be redefined by putting a
 # install-kernel-${MACHINE_NAME} target into /etc/mk.conf
Index: arch/amd64/conf/files.amd64
===================================================================
RCS file: /cvs/src/sys/arch/amd64/conf/files.amd64,v
retrieving revision 1.88
diff -u -p -u -r1.88 files.amd64
--- arch/amd64/conf/files.amd64 30 Apr 2017 13:04:49 -0000      1.88
+++ arch/amd64/conf/files.amd64 28 May 2017 13:19:03 -0000
@@ -11,6 +11,7 @@ file  arch/amd64/amd64/machdep.c
 file   arch/amd64/amd64/hibernate_machdep.c    hibernate
 file   arch/amd64/amd64/identcpu.c
 file   arch/amd64/amd64/via.c
+file   arch/amd64/amd64/locore2.S
 file   arch/amd64/amd64/aes_intel.S            crypto
 file   arch/amd64/amd64/aesni.c                crypto
 file   arch/amd64/amd64/amd64errata.c
Index: arch/i386/conf/Makefile.i386
===================================================================
RCS file: /cvs/src/sys/arch/i386/conf/Makefile.i386,v
retrieving revision 1.103
diff -u -p -u -r1.103 Makefile.i386
--- arch/i386/conf/Makefile.i386        28 May 2017 13:20:37 -0000      1.103
+++ arch/i386/conf/Makefile.i386        30 May 2017 07:26:53 -0000
@@ -29,9 +29,13 @@ CWARNFLAGS=  -Werror -Wall -Wimplicit-fun
 
 CMACHFLAGS=
 CMACHFLAGS+=   -ffreestanding ${NOPIE_FLAGS}
+SORTR=         sort -R
 .if ${IDENT:M-DNO_PROPOLICE}
 CMACHFLAGS+=   -fno-stack-protector
 .endif
+ .if ${IDENT:M-DSMALL_KERNEL}
+SORTR=         cat
+.endif
 
 DEBUG?=                -g
 COPTS?=                -O2
@@ -72,12 +76,13 @@ NORMAL_S=   ${CC} ${AFLAGS} ${CPPFLAGS} -c
 #      ${SYSTEM_LD_HEAD}
 #      ${SYSTEM_LD} swapxxx.o
 #      ${SYSTEM_LD_TAIL}
-SYSTEM_HEAD=   locore.o param.o ioconf.o
-SYSTEM_OBJ=    ${SYSTEM_HEAD} ${OBJS}
+SYSTEM_HEAD=   locore.o gap.o
+SYSTEM_OBJ=    ${SYSTEM_HEAD} ${OBJS} param.o ioconf.o
 SYSTEM_DEP=    Makefile ${SYSTEM_OBJ} ${LDSCRIPT}
 SYSTEM_LD_HEAD=        @rm -f $@
 SYSTEM_LD=     @echo ${LD} ${LINKFLAGS} -o $@ '$${SYSTEM_HEAD} vers.o 
$${OBJS}'; \
-               ${LD} ${LINKFLAGS} -o $@ ${SYSTEM_HEAD} vers.o ${OBJS}
+               echo ${OBJS} param.o ioconf.o vers.o | tr " " "\n" | ${SORTR} > 
lorder; \
+               ${LD} ${LINKFLAGS} -o $@ ${SYSTEM_HEAD} `cat lorder`
 SYSTEM_LD_TAIL=        @${SIZE} $@; chmod 755 $@
 
 .if ${DEBUG} == "-g"
@@ -120,6 +125,13 @@ ioconf.o: ioconf.c
 vers.o: ${SYSTEM_DEP} ${SYSTEM_SWAP_DEP}
        sh $S/conf/newvers.sh
        ${CC} ${CFLAGS} ${CPPFLAGS} ${PROF} -c vers.c
+
+gap.S: ${SYSTEM_SWAP_DEP} Makefile
+       #echo "#include <machine/asm.h>\n\t.text\n\t.space 
$$RANDOM*3,0xcc\n\t.align 4096,0xcc\n\t.globl 
endboot\n_C_LABEL(endboot):\n\t.space 4096+$$RANDOM%4096,0xcc\n\t.align 
16,0xcc" > gap.S
+       sh $S/conf/makegap.sh > gap.S
+
+gap.o: gap.S
+       ${CC} ${AFLAGS} ${CPPFLAGS} ${PROF} -c gap.S
 
 clean:
        rm -f *bsd *bsd.gdb *.[dio] [a-z]*.s assym.* ${DB_STRUCTINFO} param.c
Index: arch/i386/conf/files.i386
===================================================================
RCS file: /cvs/src/sys/arch/i386/conf/files.i386,v
retrieving revision 1.232
diff -u -p -u -r1.232 files.i386
--- arch/i386/conf/files.i386   30 Apr 2017 13:04:49 -0000      1.232
+++ arch/i386/conf/files.i386   29 May 2017 12:27:14 -0000
@@ -23,6 +23,7 @@ file  arch/i386/i386/in_cksum.s
 file   arch/i386/i386/machdep.c
 file   arch/i386/i386/hibernate_machdep.c hibernate
 file   arch/i386/i386/via.c
+file   arch/i386/i386/locore2.S
 file   arch/i386/i386/amd64errata.c    !small_kernel
 file   arch/i386/i386/longrun.c        !small_kernel
 file   arch/i386/i386/mem.c
Index: arch/i386/i386/autoconf.c
===================================================================
RCS file: /cvs/src/sys/arch/i386/i386/autoconf.c,v
retrieving revision 1.101
diff -u -p -u -r1.101 autoconf.c
--- arch/i386/i386/autoconf.c   8 Jun 2016 17:24:44 -0000       1.101
+++ arch/i386/i386/autoconf.c   29 May 2017 13:07:46 -0000
@@ -109,6 +109,21 @@ void               viac3_crypto_setup(void);
 extern int     i386_has_xcrypt;
 #endif
 
+void
+unmap_startup(void)
+{
+       extern void *kernel_text, *endboot;
+       vaddr_t p;
+
+       printf("unmap kernel init code %lx-%lx\n",
+           (vaddr_t)&kernel_text, (vaddr_t)&endboot);
+       p = (vaddr_t)&kernel_text;
+       do {
+               pmap_kremove(p, PAGE_SIZE);
+               p += NBPG;
+       } while (p < (vaddr_t)&endboot);
+}
+
 /*
  * Determine i/o configuration for a machine.
  */
@@ -154,6 +169,8 @@ cpu_configure(void)
 
        proc0.p_addr->u_pcb.pcb_cr0 = rcr0();
 
+       unmap_startup();
+
 #ifdef MULTIPROCESSOR
        /* propagate TSS configuration to the idle pcb's. */
        cpu_init_idle_pcbs();
@@ -165,6 +182,7 @@ cpu_configure(void)
         * until we can checksum blocks to figure it out.
         */
        cold = 0;
+
 
        /*
         * At this point the RNG is running, and if FSXR is set we can
Index: arch/i386/i386/locore.s
===================================================================
RCS file: /cvs/src/sys/arch/i386/i386/locore.s,v
retrieving revision 1.173
diff -u -p -u -r1.173 locore.s
--- arch/i386/i386/locore.s     12 May 2017 08:46:28 -0000      1.173
+++ arch/i386/i386/locore.s     30 May 2017 07:53:26 -0000
@@ -55,206 +55,20 @@
 
 #include <dev/isa/isareg.h>
 
-#if NLAPIC > 0
-#include <machine/i82489reg.h>
-#endif
-
-#ifndef SMALL_KERNEL
-/*
- * As stac/clac SMAP instructions are 3 bytes, we want the fastest
- * 3 byte nop sequence possible here.  This will be replaced by
- * stac/clac instructions if SMAP is detected after booting.
- *
- * Intel documents multi-byte NOP sequences as being available
- * on all family 0x6 and 0xf processors (ie 686+)
- * So use 3 of the single byte nops for compatibility
- */
-#define SMAP_NOP       .byte 0x90, 0x90, 0x90
-#define SMAP_STAC      CODEPATCH_START                 ;\
-                       SMAP_NOP                        ;\
-                       CODEPATCH_END(CPTAG_STAC)
-#define SMAP_CLAC      CODEPATCH_START                 ;\
-                       SMAP_NOP                        ;\
-                       CODEPATCH_END(CPTAG_CLAC)
-
-#else
-
-#define SMAP_STAC
-#define SMAP_CLAC
-
-#endif
-
-
 /*
  * override user-land alignment before including asm.h
  */
 
 #define        ALIGN_DATA      .align  4
 #define        ALIGN_TEXT      .align  4,0x90  /* 4-byte boundaries, 
NOP-filled */
-#define        SUPERALIGN_TEXT .align  16,0x90 /* 16-byte boundaries better 
for 486 */
 #define _ALIGN_TEXT    ALIGN_TEXT
 #include <machine/asm.h>
 
-#define CPL _C_LABEL(lapic_tpr)
-
-#define        GET_CURPCB(reg)                                 \
-       movl    CPUVAR(CURPCB), reg
-
-#define        CHECK_ASTPENDING(treg)                          \
-       movl    CPUVAR(CURPROC),treg            ;       \
-       cmpl    $0, treg                        ;       \
-       je      1f                              ;       \
-       cmpl    $0,P_MD_ASTPENDING(treg)        ;       \
-       1:
-
-#define        CLEAR_ASTPENDING(cpreg)                         \
-       movl    $0,P_MD_ASTPENDING(cpreg)
-
-/*
- * These are used on interrupt or trap entry or exit.
- */
-#define        INTRENTRY \
-       cld                     ; \
-       pushl   %eax            ; \
-       pushl   %ecx            ; \
-       pushl   %edx            ; \
-       pushl   %ebx            ; \
-       pushl   %ebp            ; \
-       pushl   %esi            ; \
-       pushl   %edi            ; \
-       pushl   %ds             ; \
-       pushl   %es             ; \
-       pushl   %gs             ; \
-       movl    $GSEL(GDATA_SEL, SEL_KPL),%eax  ; \
-       movw    %ax,%ds         ; \
-       movw    %ax,%es         ; \
-       xorl    %eax,%eax       ; /* $GSEL(GNULL_SEL, SEL_KPL) == 0 */ \
-       movw    %ax,%gs         ; \
-       pushl   %fs             ; \
-       movl    $GSEL(GCPU_SEL, SEL_KPL),%eax   ; \
-       movw    %ax,%fs
-
-#define        INTR_RESTORE_ALL \
-       popl    %fs             ; \
-       popl    %gs             ; \
-       popl    %es             ; \
-       popl    %ds             ; \
-       popl    %edi            ; \
-       popl    %esi            ; \
-       popl    %ebp            ; \
-       popl    %ebx            ; \
-       popl    %edx            ; \
-       popl    %ecx            ; \
-       popl    %eax
-
-#define        INTRFASTEXIT \
-       INTR_RESTORE_ALL        ;\
-       addl    $8,%esp         ; \
-       iret
-
-#define        INTR_FAKE_TRAP  0xbadabada
-
-/*
- * PTmap is recursive pagemap at top of virtual address space.
- * Within PTmap, the page directory can be found (third indirection).
- */
-       .globl  _C_LABEL(PTmap), _C_LABEL(PTD), _C_LABEL(PTDpde)
-       .set    _C_LABEL(PTmap), (PDSLOT_PTE << PDSHIFT)
-       .set    _C_LABEL(PTD), (_C_LABEL(PTmap) + PDSLOT_PTE * NBPG)
-       .set    _C_LABEL(PTDpde), (_C_LABEL(PTD) + PDSLOT_PTE * 4)      # XXX 4 
== sizeof pde
-
-/*
- * APTmap, APTD is the alternate recursive pagemap.
- * It's used when modifying another process's page tables.
- */
-       .globl  _C_LABEL(APTmap), _C_LABEL(APTD), _C_LABEL(APTDpde)
-       .set    _C_LABEL(APTmap), (PDSLOT_APTE << PDSHIFT)
-       .set    _C_LABEL(APTD), (_C_LABEL(APTmap) + PDSLOT_APTE * NBPG)
-       # XXX 4 == sizeof pde
-       .set    _C_LABEL(APTDpde), (_C_LABEL(PTD) + PDSLOT_APTE * 4)
-
 /*
  * Initialization
  */
        .data
 
-       .globl  _C_LABEL(cpu), _C_LABEL(cpu_id), _C_LABEL(cpu_vendor)
-       .globl  _C_LABEL(cpu_brandstr)
-       .globl  _C_LABEL(cpuid_level)
-       .globl  _C_LABEL(cpu_miscinfo)
-       .globl  _C_LABEL(cpu_feature), _C_LABEL(cpu_ecxfeature)
-       .globl  _C_LABEL(ecpu_feature), _C_LABEL(ecpu_eaxfeature)
-       .globl  _C_LABEL(ecpu_ecxfeature)
-       .globl  _C_LABEL(cpu_cache_eax), _C_LABEL(cpu_cache_ebx)
-       .globl  _C_LABEL(cpu_cache_ecx), _C_LABEL(cpu_cache_edx)
-       .globl  _C_LABEL(cpu_perf_eax)
-       .globl  _C_LABEL(cpu_perf_ebx)
-       .globl  _C_LABEL(cpu_perf_edx)
-       .globl  _C_LABEL(cpu_apmi_edx)
-       .globl  _C_LABEL(cold), _C_LABEL(cnvmem), _C_LABEL(extmem)
-       .globl  _C_LABEL(cpu_pae)
-       .globl  _C_LABEL(esym)
-       .globl  _C_LABEL(ssym)
-       .globl  _C_LABEL(nkptp_max)
-       .globl  _C_LABEL(boothowto), _C_LABEL(bootdev), _C_LABEL(atdevbase)
-       .globl  _C_LABEL(proc0paddr), _C_LABEL(PTDpaddr), _C_LABEL(PTDsize)
-       .globl  _C_LABEL(gdt)
-       .globl  _C_LABEL(bootapiver), _C_LABEL(bootargc), _C_LABEL(bootargv)
-       .globl  _C_LABEL(lapic_tpr)
-
-#if NLAPIC > 0
-       .align NBPG
-       .globl _C_LABEL(local_apic), _C_LABEL(lapic_id)
-_C_LABEL(local_apic):
-       .space  LAPIC_ID
-_C_LABEL(lapic_id):
-       .long   0x00000000
-       .space  LAPIC_TPRI-(LAPIC_ID+4)
-_C_LABEL(lapic_tpr):
-       .space  LAPIC_PPRI-LAPIC_TPRI
-_C_LABEL(lapic_ppr):
-       .space  LAPIC_ISR-LAPIC_PPRI
-_C_LABEL(lapic_isr):
-       .space  NBPG-LAPIC_ISR
-#else
-_C_LABEL(lapic_tpr):
-       .long   0
-#endif
-
-_C_LABEL(cpu):         .long   0       # are we 386, 386sx, 486, 586 or 686
-_C_LABEL(cpu_id):      .long   0       # saved from 'cpuid' instruction
-_C_LABEL(cpu_pae):     .long   0       # are we using PAE paging mode?
-_C_LABEL(cpu_miscinfo):        .long   0       # misc info (apic/brand id) 
from 'cpuid'
-_C_LABEL(cpu_feature): .long   0       # feature flags from 'cpuid' instruction
-_C_LABEL(ecpu_feature): .long  0       # extended feature flags from 'cpuid'
-_C_LABEL(cpu_ecxfeature):.long 0       # ecx feature flags from 'cpuid'
-_C_LABEL(ecpu_eaxfeature): .long 0     # extended eax feature flags
-_C_LABEL(ecpu_ecxfeature): .long 0     # extended ecx feature flags
-_C_LABEL(cpuid_level): .long   -1      # max. lvl accepted by 'cpuid' insn
-_C_LABEL(cpu_cache_eax):.long  0
-_C_LABEL(cpu_cache_ebx):.long  0
-_C_LABEL(cpu_cache_ecx):.long  0
-_C_LABEL(cpu_cache_edx):.long  0
-_C_LABEL(cpu_perf_eax):        .long   0       # arch. perf. mon. flags from 
'cpuid'
-_C_LABEL(cpu_perf_ebx):        .long   0       # arch. perf. mon. flags from 
'cpuid'
-_C_LABEL(cpu_perf_edx):        .long   0       # arch. perf. mon. flags from 
'cpuid'
-_C_LABEL(cpu_apmi_edx):        .long   0       # adv. power management info. 
'cpuid'
-_C_LABEL(cpu_vendor): .space 16        # vendor string returned by 'cpuid' 
instruction
-_C_LABEL(cpu_brandstr):        .space 48 # brand string returned by 'cpuid'
-_C_LABEL(cold):                .long   1       # cold till we are not
-_C_LABEL(ssym):                .long   0       # ptr to start of syms
-_C_LABEL(esym):                .long   0       # ptr to end of syms
-_C_LABEL(cnvmem):      .long   0       # conventional memory size
-_C_LABEL(extmem):      .long   0       # extended memory size
-_C_LABEL(atdevbase):   .long   0       # location of start of iomem in virtual
-_C_LABEL(bootapiver):  .long   0       # /boot API version
-_C_LABEL(bootargc):    .long   0       # /boot argc
-_C_LABEL(bootargv):    .long   0       # /boot argv
-_C_LABEL(bootdev):     .long   0       # device we booted from
-_C_LABEL(proc0paddr):  .long   0
-_C_LABEL(PTDpaddr):    .long   0       # paddr of PTD, for libkvm
-_C_LABEL(PTDsize):     .long   NBPG    # size of PTD, for libkvm
-
        .space 512
 tmpstk:
 
@@ -666,1092 +480,3 @@ begin:
 
        call    _C_LABEL(main)
        /* NOTREACHED */
-
-NENTRY(proc_trampoline)
-#ifdef MULTIPROCESSOR
-       call    _C_LABEL(proc_trampoline_mp)
-#endif
-       movl    $IPL_NONE,CPL
-       pushl   %ebx
-       call    *%esi
-       addl    $4,%esp
-       INTRFASTEXIT
-       /* NOTREACHED */
-
-       /* This must come before any use of the CODEPATCH macros */
-       .section .codepatch,"a"
-       .align  8
-       .globl _C_LABEL(codepatch_begin)
-_C_LABEL(codepatch_begin):
-       .previous
-
-       .section .codepatchend,"a"
-       .globl _C_LABEL(codepatch_end)
-_C_LABEL(codepatch_end):
-       .previous
-
-/*****************************************************************************/
-
-/*
- * Signal trampoline; copied to top of user stack.
- */
-       .section .rodata
-       .globl  _C_LABEL(sigcode)
-_C_LABEL(sigcode):
-       call    *SIGF_HANDLER(%esp)
-       leal    SIGF_SC(%esp),%eax      # scp (the call may have clobbered the
-                                       # copy at SIGF_SCP(%esp))
-       pushl   %eax
-       pushl   %eax                    # junk to fake return address
-       movl    $SYS_sigreturn,%eax
-       int     $0x80                   # enter kernel with args on stack
-       .globl  _C_LABEL(sigcoderet)
-_C_LABEL(sigcoderet):
-       movl    $SYS_exit,%eax
-       int     $0x80                   # exit if sigreturn fails
-       .globl  _C_LABEL(esigcode)
-_C_LABEL(esigcode):
-
-       .globl  _C_LABEL(sigfill)
-_C_LABEL(sigfill):
-       int3
-_C_LABEL(esigfill):
-
-       .data
-       .globl  _C_LABEL(sigfillsiz)
-_C_LABEL(sigfillsiz):
-       .long   _C_LABEL(esigfill) - _C_LABEL(sigfill)
-
-       .text
-
-/*****************************************************************************/
-
-/*
- * The following primitives are used to fill and copy regions of memory.
- */
-
-/* Frame pointer reserve on stack. */
-#ifdef DDB
-#define FPADD 4
-#else
-#define FPADD 0
-#endif
-
-/*
- * kcopy(caddr_t from, caddr_t to, size_t len);
- * Copy len bytes, abort on fault.
- */
-ENTRY(kcopy)
-#ifdef DDB
-       pushl   %ebp
-       movl    %esp,%ebp
-#endif
-       pushl   %esi
-       pushl   %edi
-       GET_CURPCB(%eax)                # load curpcb into eax and set on-fault
-       pushl   PCB_ONFAULT(%eax)
-       movl    $_C_LABEL(copy_fault), PCB_ONFAULT(%eax)
-
-       movl    16+FPADD(%esp),%esi
-       movl    20+FPADD(%esp),%edi
-       movl    24+FPADD(%esp),%ecx
-       movl    %edi,%eax
-       subl    %esi,%eax
-       cmpl    %ecx,%eax               # overlapping?
-       jb      1f
-       shrl    $2,%ecx                 # nope, copy forward by 32-bit words
-       rep
-       movsl
-       movl    24+FPADD(%esp),%ecx
-       andl    $3,%ecx                 # any bytes left?
-       rep
-       movsb
-
-       GET_CURPCB(%edx)                # XXX save curpcb?
-       popl    PCB_ONFAULT(%edx)
-       popl    %edi
-       popl    %esi
-       xorl    %eax,%eax
-#ifdef DDB
-       leave
-#endif
-       ret
-
-       ALIGN_TEXT
-1:     addl    %ecx,%edi               # copy backward
-       addl    %ecx,%esi
-       std
-       andl    $3,%ecx                 # any fractional bytes?
-       decl    %edi
-       decl    %esi
-       rep
-       movsb
-       movl    24+FPADD(%esp),%ecx     # copy remainder by 32-bit words
-       shrl    $2,%ecx
-       subl    $3,%esi
-       subl    $3,%edi
-       rep
-       movsl
-       cld
-
-       GET_CURPCB(%edx)
-       popl    PCB_ONFAULT(%edx)
-       popl    %edi
-       popl    %esi
-       xorl    %eax,%eax
-#ifdef DDB
-       leave
-#endif
-       ret
-       
-/*****************************************************************************/
-
-/*
- * The following primitives are used to copy data in and out of the user's
- * address space.
- */
-
-/*
- * copyout(caddr_t from, caddr_t to, size_t len);
- * Copy len bytes into the user's address space.
- */
-ENTRY(copyout)
-#ifdef DDB
-       pushl   %ebp
-       movl    %esp,%ebp
-#endif
-       pushl   %esi
-       pushl   %edi
-       pushl   $0      
-       
-       movl    16+FPADD(%esp),%esi
-       movl    20+FPADD(%esp),%edi
-       movl    24+FPADD(%esp),%eax
-
-       /*
-        * We check that the end of the destination buffer is not past the end
-        * of the user's address space.  If it's not, then we only need to
-        * check that each page is writable.  The 486 will do this for us; the
-        * 386 will not.  (We assume that pages in user space that are not
-        * writable by the user are not writable by the kernel either.)
-        */
-       movl    %edi,%edx
-       addl    %eax,%edx
-       jc      _C_LABEL(copy_fault)
-       cmpl    $VM_MAXUSER_ADDRESS,%edx
-       ja      _C_LABEL(copy_fault)
-
-       GET_CURPCB(%edx)
-       movl    $_C_LABEL(copy_fault),PCB_ONFAULT(%edx)
-       SMAP_STAC
-
-       /* bcopy(%esi, %edi, %eax); */
-       movl    %eax,%ecx
-       shrl    $2,%ecx
-       rep
-       movsl
-       movl    %eax,%ecx
-       andl    $3,%ecx
-       rep
-       movsb
-
-       SMAP_CLAC
-       popl    PCB_ONFAULT(%edx)
-       popl    %edi
-       popl    %esi
-       xorl    %eax,%eax
-#ifdef DDB
-       leave
-#endif
-       ret
-
-/*
- * copyin(caddr_t from, caddr_t to, size_t len);
- * Copy len bytes from the user's address space.
- */
-ENTRY(copyin)
-#ifdef DDB
-       pushl   %ebp
-       movl    %esp,%ebp
-#endif
-       pushl   %esi
-       pushl   %edi
-       GET_CURPCB(%eax)
-       pushl   $0
-       movl    $_C_LABEL(copy_fault),PCB_ONFAULT(%eax)
-       SMAP_STAC
-       
-       movl    16+FPADD(%esp),%esi
-       movl    20+FPADD(%esp),%edi
-       movl    24+FPADD(%esp),%eax
-
-       /*
-        * We check that the end of the destination buffer is not past the end
-        * of the user's address space.  If it's not, then we only need to
-        * check that each page is readable, and the CPU will do that for us.
-        */
-       movl    %esi,%edx
-       addl    %eax,%edx
-       jc      _C_LABEL(copy_fault)
-       cmpl    $VM_MAXUSER_ADDRESS,%edx
-       ja      _C_LABEL(copy_fault)
-
-       /* bcopy(%esi, %edi, %eax); */
-       movl    %eax,%ecx
-       shrl    $2,%ecx
-       rep
-       movsl
-       movb    %al,%cl
-       andb    $3,%cl
-       rep
-       movsb
-
-       SMAP_CLAC
-       GET_CURPCB(%edx)
-       popl    PCB_ONFAULT(%edx)
-       popl    %edi
-       popl    %esi
-       xorl    %eax,%eax
-#ifdef DDB
-       leave
-#endif
-       ret
-
-ENTRY(copy_fault)
-       SMAP_CLAC
-       GET_CURPCB(%edx)
-       popl    PCB_ONFAULT(%edx)
-       popl    %edi
-       popl    %esi
-       movl    $EFAULT,%eax
-#ifdef DDB
-       leave
-#endif
-       ret
-
-/*
- * copyoutstr(caddr_t from, caddr_t to, size_t maxlen, size_t *lencopied);
- * Copy a NUL-terminated string, at most maxlen characters long, into the
- * user's address space.  Return the number of characters copied (including the
- * NUL) in *lencopied.  If the string is too long, return ENAMETOOLONG; else
- * return 0 or EFAULT.
- */
-ENTRY(copyoutstr)
-#ifdef DDB
-       pushl   %ebp
-       movl    %esp,%ebp
-#endif
-       pushl   %esi
-       pushl   %edi
-
-       movl    12+FPADD(%esp),%esi             # esi = from
-       movl    16+FPADD(%esp),%edi             # edi = to
-       movl    20+FPADD(%esp),%edx             # edx = maxlen
-
-5:     GET_CURPCB(%eax)
-       movl    $_C_LABEL(copystr_fault),PCB_ONFAULT(%eax)
-       SMAP_STAC
-       /*
-        * Get min(%edx, VM_MAXUSER_ADDRESS-%edi).
-        */
-       movl    $VM_MAXUSER_ADDRESS,%eax
-       subl    %edi,%eax
-       jbe     _C_LABEL(copystr_fault)         # die if CF == 1 || ZF == 1
-                                               # i.e. make sure that %edi
-                                               # is below VM_MAXUSER_ADDRESS
-
-       cmpl    %edx,%eax
-       jae     1f
-       movl    %eax,%edx
-       movl    %eax,20+FPADD(%esp)
-
-1:     incl    %edx
-
-1:     decl    %edx
-       jz      2f
-       lodsb
-       stosb
-       testb   %al,%al
-       jnz     1b
-
-       /* Success -- 0 byte reached. */
-       decl    %edx
-       xorl    %eax,%eax
-       jmp     copystr_return
-
-2:     /* edx is zero -- return EFAULT or ENAMETOOLONG. */
-       cmpl    $VM_MAXUSER_ADDRESS,%edi
-       jae     _C_LABEL(copystr_fault)
-       movl    $ENAMETOOLONG,%eax
-       jmp     copystr_return
-
-/*
- * copyinstr(caddr_t from, caddr_t to, size_t maxlen, size_t *lencopied);
- * Copy a NUL-terminated string, at most maxlen characters long, from the
- * user's address space.  Return the number of characters copied (including the
- * NUL) in *lencopied.  If the string is too long, return ENAMETOOLONG; else
- * return 0 or EFAULT.
- */
-ENTRY(copyinstr)
-#ifdef DDB
-       pushl   %ebp
-       movl    %esp,%ebp
-#endif
-       pushl   %esi
-       pushl   %edi
-       GET_CURPCB(%ecx)
-       movl    $_C_LABEL(copystr_fault),PCB_ONFAULT(%ecx)
-       SMAP_STAC
-
-       movl    12+FPADD(%esp),%esi             # %esi = from
-       movl    16+FPADD(%esp),%edi             # %edi = to
-       movl    20+FPADD(%esp),%edx             # %edx = maxlen
-
-       /*
-        * Get min(%edx, VM_MAXUSER_ADDRESS-%esi).
-        */
-       movl    $VM_MAXUSER_ADDRESS,%eax
-       subl    %esi,%eax
-       jbe     _C_LABEL(copystr_fault)         # Error if CF == 1 || ZF == 1
-                                               # i.e. make sure that %esi
-                                               # is below VM_MAXUSER_ADDRESS
-       cmpl    %edx,%eax
-       jae     1f
-       movl    %eax,%edx
-       movl    %eax,20+FPADD(%esp)
-
-1:     incl    %edx
-
-1:     decl    %edx
-       jz      2f
-       lodsb
-       stosb
-       testb   %al,%al
-       jnz     1b
-
-       /* Success -- 0 byte reached. */
-       decl    %edx
-       xorl    %eax,%eax
-       jmp     copystr_return
-
-2:     /* edx is zero -- return EFAULT or ENAMETOOLONG. */
-       cmpl    $VM_MAXUSER_ADDRESS,%esi
-       jae     _C_LABEL(copystr_fault)
-       movl    $ENAMETOOLONG,%eax
-       jmp     copystr_return
-
-ENTRY(copystr_fault)
-       movl    $EFAULT,%eax
-
-copystr_return:
-       SMAP_CLAC
-       /* Set *lencopied and return %eax. */
-       GET_CURPCB(%ecx)
-       movl    $0,PCB_ONFAULT(%ecx)
-       movl    20+FPADD(%esp),%ecx
-       subl    %edx,%ecx
-       movl    24+FPADD(%esp),%edx
-       testl   %edx,%edx
-       jz      8f
-       movl    %ecx,(%edx)
-
-8:     popl    %edi
-       popl    %esi
-#ifdef DDB
-       leave
-#endif
-       ret
-
-/*
- * copystr(caddr_t from, caddr_t to, size_t maxlen, size_t *lencopied);
- * Copy a NUL-terminated string, at most maxlen characters long.  Return the
- * number of characters copied (including the NUL) in *lencopied.  If the
- * string is too long, return ENAMETOOLONG; else return 0.
- */
-ENTRY(copystr)
-#ifdef DDB
-       pushl   %ebp
-       movl    %esp,%ebp
-#endif
-       pushl   %esi
-       pushl   %edi
-
-       movl    12+FPADD(%esp),%esi             # esi = from
-       movl    16+FPADD(%esp),%edi             # edi = to
-       movl    20+FPADD(%esp),%edx             # edx = maxlen
-       incl    %edx
-
-1:     decl    %edx
-       jz      4f
-       lodsb
-       stosb
-       testb   %al,%al
-       jnz     1b
-
-       /* Success -- 0 byte reached. */
-       decl    %edx
-       xorl    %eax,%eax
-       jmp     6f
-
-4:     /* edx is zero -- return ENAMETOOLONG. */
-       movl    $ENAMETOOLONG,%eax
-
-6:     /* Set *lencopied and return %eax. */
-       movl    20+FPADD(%esp),%ecx
-       subl    %edx,%ecx
-       movl    24+FPADD(%esp),%edx
-       testl   %edx,%edx
-       jz      7f
-       movl    %ecx,(%edx)
-
-7:     popl    %edi
-       popl    %esi
-#ifdef DDB
-       leave
-#endif
-       ret
-
-/*****************************************************************************/
-
-/*
- * The following is i386-specific nonsense.
- */
-
-/*
- * void lgdt(struct region_descriptor *rdp);
- * Change the global descriptor table.
- */
-NENTRY(lgdt)
-       /* Reload the descriptor table. */
-       movl    4(%esp),%eax
-       lgdt    (%eax)
-       /* Flush the prefetch q. */
-       jmp     1f
-       nop
-1:     /* Reload "stale" selectors. */
-       movl    $GSEL(GDATA_SEL, SEL_KPL),%eax
-       movw    %ax,%ds
-       movw    %ax,%es
-       movw    %ax,%ss
-       movl    $GSEL(GCPU_SEL, SEL_KPL),%eax
-       movw    %ax,%fs
-       /* Reload code selector by doing intersegment return. */
-       popl    %eax
-       pushl   $GSEL(GCODE_SEL, SEL_KPL)
-       pushl   %eax
-       lret
-
-ENTRY(setjmp)
-       movl    4(%esp),%eax
-       movl    %ebx,(%eax)             # save ebx
-       movl    %esp,4(%eax)            # save esp
-       movl    %ebp,8(%eax)            # save ebp
-       movl    %esi,12(%eax)           # save esi
-       movl    %edi,16(%eax)           # save edi
-       movl    (%esp),%edx             # get rta
-       movl    %edx,20(%eax)           # save eip
-       xorl    %eax,%eax               # return (0);
-       ret
-
-ENTRY(longjmp)
-       movl    4(%esp),%eax
-       movl    (%eax),%ebx             # restore ebx
-       movl    4(%eax),%esp            # restore esp
-       movl    8(%eax),%ebp            # restore ebp
-       movl    12(%eax),%esi           # restore esi
-       movl    16(%eax),%edi           # restore edi
-       movl    20(%eax),%edx           # get rta
-       movl    %edx,(%esp)             # put in return frame
-       xorl    %eax,%eax               # return (1);
-       incl    %eax
-       ret
-
-/*****************************************************************************/
-               
-/*
- * cpu_switchto(struct proc *old, struct proc *new)
- * Switch from the "old" proc to the "new" proc. If "old" is NULL, we
- * don't need to bother saving old context.
- */
-ENTRY(cpu_switchto)
-       pushl   %ebx
-       pushl   %esi
-       pushl   %edi
-
-       movl    16(%esp), %esi
-       movl    20(%esp), %edi
-
-       /* If old process exited, don't bother. */
-       testl   %esi,%esi
-       jz      switch_exited
-
-       /* Save old stack pointers. */
-       movl    P_ADDR(%esi),%ebx
-       movl    %esp,PCB_ESP(%ebx)
-       movl    %ebp,PCB_EBP(%ebx)
-
-switch_exited:
-       /* Restore saved context. */
-
-       /* No interrupts while loading new state. */
-       cli
-
-       /* Record new process. */
-       movl    %edi, CPUVAR(CURPROC)
-       movb    $SONPROC, P_STAT(%edi)
-
-       /* Restore stack pointers. */
-       movl    P_ADDR(%edi),%ebx
-       movl    PCB_ESP(%ebx),%esp
-       movl    PCB_EBP(%ebx),%ebp
-
-       /* Record new pcb. */
-       movl    %ebx, CPUVAR(CURPCB)
-
-       /*
-        * Activate the address space.  The pcb copy of %cr3 will
-        * be refreshed from the pmap, and because we're
-        * curproc they'll both be reloaded into the CPU.
-        */
-       pushl   %edi
-       pushl   %esi
-       call    _C_LABEL(pmap_switch)
-       addl    $8,%esp
-
-       /* Load TSS info. */
-       movl    CPUVAR(GDT),%eax
-       movl    P_MD_TSS_SEL(%edi),%edx
-
-       /* Switch TSS. */
-       andl    $~0x0200,4-SEL_KPL(%eax,%edx,1)
-       ltr     %dx
-
-       /* Restore cr0 (including FPU state). */
-       movl    PCB_CR0(%ebx),%ecx
-#ifdef MULTIPROCESSOR
-       /*
-        * If our floating point registers are on a different CPU,
-        * clear CR0_TS so we'll trap rather than reuse bogus state.
-        */
-       movl    CPUVAR(SELF), %esi
-       cmpl    PCB_FPCPU(%ebx), %esi
-       jz      1f
-       orl     $CR0_TS,%ecx
-1:     
-#endif 
-       movl    %ecx,%cr0
-
-       /* Interrupts are okay again. */
-       sti
-
-       popl    %edi
-       popl    %esi
-       popl    %ebx
-       ret
-
-ENTRY(cpu_idle_enter)
-       movl    _C_LABEL(cpu_idle_enter_fcn),%eax
-       cmpl    $0,%eax
-       je      1f
-       jmpl    *%eax
-1:
-       ret
-
-ENTRY(cpu_idle_cycle)
-       movl    _C_LABEL(cpu_idle_cycle_fcn),%eax
-       cmpl    $0,%eax
-       je      1f
-       call    *%eax
-       ret
-1:
-       sti
-       hlt
-       ret
-
-ENTRY(cpu_idle_leave)
-       movl    _C_LABEL(cpu_idle_leave_fcn),%eax
-       cmpl    $0,%eax
-       je      1f
-       jmpl    *%eax
-1:
-       ret
-
-/*
- * savectx(struct pcb *pcb);
- * Update pcb, saving current processor state.
- */
-ENTRY(savectx)
-       movl    4(%esp),%edx            # edx = p->p_addr
-
-       /* Save stack pointers. */
-       movl    %esp,PCB_ESP(%edx)
-       movl    %ebp,PCB_EBP(%edx)
-
-       movl    PCB_FLAGS(%edx),%ecx
-       orl     $PCB_SAVECTX,%ecx
-       movl    %ecx,PCB_FLAGS(%edx)
-
-       ret
-
-/*****************************************************************************/
-
-/*
- * Trap and fault vector routines
- *
- * On exit from the kernel to user mode, we always need to check for ASTs.  In
- * addition, we need to do this atomically; otherwise an interrupt may occur
- * which causes an AST, but it won't get processed until the next kernel entry
- * (possibly the next clock tick).  Thus, we disable interrupt before checking,
- * and only enable them again on the final `iret' or before calling the AST
- * handler.
- */
-#define        IDTVEC(name)    ALIGN_TEXT; .globl X##name; X##name:
-
-#define        TRAP(a)         pushl $(a) ; jmp _C_LABEL(alltraps)
-#define        ZTRAP(a)        pushl $0 ; TRAP(a)
-
-
-       .text
-IDTVEC(div)
-       ZTRAP(T_DIVIDE)
-IDTVEC(dbg)
-       subl    $4,%esp
-       pushl   %eax
-       movl    %dr6,%eax
-       movl    %eax,4(%esp)
-       andb    $~0xf,%al
-       movl    %eax,%dr6
-       popl    %eax
-       TRAP(T_TRCTRAP)
-IDTVEC(nmi)
-       ZTRAP(T_NMI)
-IDTVEC(bpt)
-       ZTRAP(T_BPTFLT)
-IDTVEC(ofl)
-       ZTRAP(T_OFLOW)
-IDTVEC(bnd)
-       ZTRAP(T_BOUND)
-IDTVEC(ill)
-       ZTRAP(T_PRIVINFLT)
-IDTVEC(dna)
-#if NNPX > 0
-       pushl   $0                      # dummy error code
-       pushl   $T_DNA
-       INTRENTRY
-#ifdef MULTIPROCESSOR
-       pushl   CPUVAR(SELF)
-#else
-       pushl   $_C_LABEL(cpu_info_primary)
-#endif
-       call    *_C_LABEL(npxdna_func)
-       addl    $4,%esp
-       testl   %eax,%eax
-       jz      calltrap
-       INTRFASTEXIT
-#else
-       ZTRAP(T_DNA)
-#endif
-IDTVEC(dble)
-       TRAP(T_DOUBLEFLT)
-IDTVEC(fpusegm)
-       ZTRAP(T_FPOPFLT)
-IDTVEC(tss)
-       TRAP(T_TSSFLT)
-IDTVEC(missing)
-       TRAP(T_SEGNPFLT)
-IDTVEC(stk)
-       TRAP(T_STKFLT)
-IDTVEC(prot)
-       TRAP(T_PROTFLT)
-IDTVEC(f00f_redirect)
-       pushl   $T_PAGEFLT
-       INTRENTRY
-       testb   $PGEX_U,TF_ERR(%esp)
-       jnz     calltrap
-       movl    %cr2,%eax
-       subl    _C_LABEL(idt),%eax
-       cmpl    $(6*8),%eax
-       jne     calltrap
-       movb    $T_PRIVINFLT,TF_TRAPNO(%esp)
-       jmp     calltrap
-IDTVEC(page)
-       TRAP(T_PAGEFLT)
-IDTVEC(rsvd)
-       ZTRAP(T_RESERVED)
-IDTVEC(mchk)
-       ZTRAP(T_MACHK)
-IDTVEC(simd)
-       ZTRAP(T_XFTRAP)
-IDTVEC(intrspurious)
-       /*
-        * The Pentium Pro local APIC may erroneously call this vector for a
-        * default IR7.  Just ignore it.
-        *
-        * (The local APIC does this when CPL is raised while it's on the
-        * way to delivering an interrupt.. presumably enough has been set
-        * up that it's inconvenient to abort delivery completely..)
-        */
-       iret
-IDTVEC(fpu)
-#if NNPX > 0
-       /*
-        * Handle like an interrupt so that we can call npxintr to clear the
-        * error.  It would be better to handle npx interrupts as traps but
-        * this is difficult for nested interrupts.
-        */
-       subl    $8,%esp                 /* space for tf_{err,trapno} */
-       INTRENTRY
-       pushl   CPL                     # if_ppl in intrframe
-       pushl   %esp                    # push address of intrframe
-       incl    _C_LABEL(uvmexp)+V_TRAP
-       call    _C_LABEL(npxintr)
-       addl    $8,%esp                 # pop address and if_ppl
-       INTRFASTEXIT
-#else
-       ZTRAP(T_ARITHTRAP)
-#endif
-IDTVEC(align)
-       ZTRAP(T_ALIGNFLT)
-       /* 18 - 31 reserved for future exp */
-
-/*
- * If an error is detected during trap, syscall, or interrupt exit, trap() will
- * change %eip to point to one of these labels.  We clean up the stack, if
- * necessary, and resume as if we were handling a general protection fault.
- * This will cause the process to get a SIGBUS.
- */
-NENTRY(resume_iret)
-       ZTRAP(T_PROTFLT)
-NENTRY(resume_pop_ds)
-       pushl   %es
-       movl    $GSEL(GDATA_SEL, SEL_KPL),%eax
-       movw    %ax,%es
-NENTRY(resume_pop_es)
-       pushl   %gs
-       xorl    %eax,%eax       /* $GSEL(GNULL_SEL, SEL_KPL) == 0 */
-       movw    %ax,%gs
-NENTRY(resume_pop_gs)
-       pushl   %fs
-       movl    $GSEL(GCPU_SEL, SEL_KPL),%eax
-       movw    %ax,%fs
-NENTRY(resume_pop_fs)
-       movl    $T_PROTFLT,TF_TRAPNO(%esp)
-       sti
-       jmp     calltrap
-
-/*
- * All traps go through here. Call the generic trap handler, and
- * check for ASTs afterwards.
- */
-NENTRY(alltraps)
-       INTRENTRY
-       sti
-calltrap:
-#ifdef DIAGNOSTIC
-       movl    CPL,%ebx
-#endif /* DIAGNOSTIC */
-#if !defined(GPROF) && defined(DDBPROF)
-       cmpl    $T_BPTFLT,TF_TRAPNO(%esp)
-       jne     .Lreal_trap
-
-       pushl   %esp
-       call    _C_LABEL(db_prof_hook)
-       addl    $4,%esp
-       cmpl    $1,%eax
-       jne     .Lreal_trap
-
-       /*
-        * Abuse the error field to indicate that INTRFASTEXIT needs
-        * to emulate the patched instruction.
-        */
-       movl    $INTR_FAKE_TRAP, TF_ERR(%esp)
-       jz      2f
-.Lreal_trap:
-#endif /* !defined(GPROF) && defined(DDBPROF) */
-       pushl   %esp
-       call    _C_LABEL(trap)
-       addl    $4,%esp
-2:     /* Check for ASTs on exit to user mode. */
-       cli
-       CHECK_ASTPENDING(%ecx)
-       je      1f
-       testb   $SEL_RPL,TF_CS(%esp)
-#ifdef VM86
-       jnz     5f
-       testl   $PSL_VM,TF_EFLAGS(%esp)
-#endif
-       jz      1f
-5:     CLEAR_ASTPENDING(%ecx)
-       sti
-       pushl   %esp
-       call    _C_LABEL(ast)
-       addl    $4,%esp
-       jmp     2b
-1:
-#if !defined(GPROF) && defined(DDBPROF)
-       /*
-        * If we are returning from a probe trap we need to fix the
-        * stack layout and emulate the patched instruction.
-        *
-        * The code below does that by trashing %eax, so it MUST be
-        * restored afterward.
-        */
-       cmpl    $INTR_FAKE_TRAP, TF_ERR(%esp)
-       je      .Lprobe_fixup
-#endif /* !defined(GPROF) && defined(DDBPROF) */
-#ifndef DIAGNOSTIC
-       INTRFASTEXIT
-#else
-       cmpl    CPL,%ebx
-       jne     3f
-       INTRFASTEXIT
-3:     sti
-       pushl   $4f
-       call    _C_LABEL(printf)
-       addl    $4,%esp
-#if defined(DDB) && 0
-       int     $3
-#endif /* DDB */
-       movl    %ebx,CPL
-       jmp     2b
-4:     .asciz  "WARNING: SPL NOT LOWERED ON TRAP EXIT\n"
-#endif /* DIAGNOSTIC */
-
-#if !defined(GPROF) && defined(DDBPROF)
-.Lprobe_fixup:
-       /* Restore all register unwinding the stack. */
-       INTR_RESTORE_ALL
-
-       /*
-        * Use the space left by ``err'' and ``trapno'' to emulate
-        * "pushl %ebp".
-        *
-        * Temporarily save %eax.
-        */
-       movl    %eax,0(%esp)
-
-       /* Shift hardware-saved registers: eip, cs, eflags */
-       movl    8(%esp),%eax
-       movl    %eax,4(%esp)
-       movl    12(%esp),%eax
-       movl    %eax,8(%esp)
-       movl    16(%esp),%eax
-       movl    %eax,12(%esp)
-
-       /* Store %ebp in the expected location to finish the emulation. */
-       movl    %ebp,16(%esp)
-
-       popl    %eax
-       iret
-#endif /* !defined(GPROF) && defined(DDBPROF) */
-/*
- * Trap gate entry for syscall
- */
-IDTVEC(syscall)
-       subl    $8,%esp                 /* space for tf_{err,trapno} */
-       INTRENTRY
-       pushl   %esp
-       call    _C_LABEL(syscall)
-       addl    $4,%esp
-2:     /* Check for ASTs on exit to user mode. */
-       cli
-       CHECK_ASTPENDING(%ecx)
-       je      1f
-       /* Always returning to user mode here. */
-       CLEAR_ASTPENDING(%ecx)
-       sti
-       pushl   %esp
-       call    _C_LABEL(ast)
-       addl    $4,%esp
-       jmp     2b
-1:     INTRFASTEXIT
-
-#include <i386/i386/vector.s>
-#include <i386/isa/icu.s>
-
-/*
- * bzero (void *b, size_t len)
- *     write len zero bytes to the string b.
- */
-
-ENTRY(bzero)
-       pushl   %edi
-       movl    8(%esp),%edi
-       movl    12(%esp),%edx
-
-       xorl    %eax,%eax               /* set fill data to 0 */
-
-       /*
-        * if the string is too short, it's really not worth the overhead
-        * of aligning to word boundaries, etc.  So we jump to a plain
-        * unaligned set.
-        */
-       cmpl    $16,%edx
-       jb      7f
-
-       movl    %edi,%ecx               /* compute misalignment */
-       negl    %ecx
-       andl    $3,%ecx
-       subl    %ecx,%edx
-       rep                             /* zero until word aligned */
-       stosb
-
-       cmpl    $CPUCLASS_486,_C_LABEL(cpu_class)
-       jne     8f
-
-       movl    %edx,%ecx
-       shrl    $6,%ecx
-       jz      8f
-       andl    $63,%edx
-1:     movl    %eax,(%edi)
-       movl    %eax,4(%edi)
-       movl    %eax,8(%edi)
-       movl    %eax,12(%edi)
-       movl    %eax,16(%edi)
-       movl    %eax,20(%edi)
-       movl    %eax,24(%edi)
-       movl    %eax,28(%edi)
-       movl    %eax,32(%edi)
-       movl    %eax,36(%edi)
-       movl    %eax,40(%edi)
-       movl    %eax,44(%edi)
-       movl    %eax,48(%edi)
-       movl    %eax,52(%edi)
-       movl    %eax,56(%edi)
-       movl    %eax,60(%edi)
-       addl    $64,%edi
-       decl    %ecx
-       jnz     1b
-
-8:     movl    %edx,%ecx               /* zero by words */
-       shrl    $2,%ecx
-       andl    $3,%edx
-       rep
-       stosl
-
-7:     movl    %edx,%ecx               /* zero remainder bytes */
-       rep
-       stosb
-
-       popl    %edi
-       ret
-
-#if !defined(SMALL_KERNEL)
-ENTRY(sse2_pagezero)
-       pushl   %ebx
-       movl    8(%esp),%ecx
-       movl    %ecx,%eax
-       addl    $4096,%eax
-       xor     %ebx,%ebx
-1:
-       movnti  %ebx,(%ecx)
-       addl    $4,%ecx
-       cmpl    %ecx,%eax
-       jne     1b
-       sfence
-       popl    %ebx
-       ret
-
-ENTRY(i686_pagezero)
-       pushl   %edi
-       pushl   %ebx
-
-       movl    12(%esp), %edi
-       movl    $1024, %ecx
-
-       ALIGN_TEXT
-1:
-       xorl    %eax, %eax
-       repe
-       scasl
-       jnz     2f
-
-       popl    %ebx
-       popl    %edi
-       ret
-
-       ALIGN_TEXT
-
-2:
-       incl    %ecx
-       subl    $4, %edi
-
-       movl    %ecx, %edx
-       cmpl    $16, %ecx
-
-       jge     3f
-
-       movl    %edi, %ebx
-       andl    $0x3f, %ebx
-       shrl    %ebx
-       shrl    %ebx
-       movl    $16, %ecx
-       subl    %ebx, %ecx
-
-3:
-       subl    %ecx, %edx
-       rep
-       stosl
-
-       movl    %edx, %ecx
-       testl   %edx, %edx
-       jnz     1b
-
-       popl    %ebx
-       popl    %edi
-       ret
-#endif
-
-/*
- * int cpu_paenable(void *);
- */
-ENTRY(cpu_paenable)
-       movl    $-1, %eax
-       testl   $CPUID_PAE, _C_LABEL(cpu_feature)
-       jz      1f
-
-       pushl   %esi
-       pushl   %edi
-       movl    12(%esp), %esi
-       movl    %cr3, %edi
-       orl     $0xfe0, %edi    /* PDPT will be in the last four slots! */
-       movl    %edi, %cr3
-       addl    $KERNBASE, %edi /* and make it back virtual again */
-       movl    $8, %ecx
-       rep
-       movsl
-
-       movl    $MSR_EFER, %ecx
-       rdmsr
-       orl     $EFER_NXE, %eax
-       wrmsr
-
-       movl    %cr4, %eax
-       orl     $CR4_PAE, %eax
-       movl    %eax, %cr4      /* BANG!!! */
-
-       movl    12(%esp), %eax
-       subl    $KERNBASE, %eax
-       movl    %eax, %cr3      /* reload real PDPT */
-       movl    $4*NBPG, %eax
-       movl    %eax, _C_LABEL(PTDsize)
-
-       xorl    %eax, %eax
-       popl    %edi
-       popl    %esi
-1:
-       ret
-
-#if NLAPIC > 0
-#include <i386/i386/apicvec.s>
-#endif
-
-#include <i386/i386/mutex.S>
-
-.globl _C_LABEL(_stac)
-_C_LABEL(_stac):
-       stac
-
-.globl _C_LABEL(_clac)
-_C_LABEL(_clac):
-       clac
Index: arch/i386/i386/locore2.S
===================================================================
RCS file: arch/i386/i386/locore2.S
diff -N arch/i386/i386/locore2.S
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ arch/i386/i386/locore2.S    30 May 2017 07:52:22 -0000
@@ -0,0 +1,1346 @@
+/*     $OpenBSD: locore.s,v 1.173 2017/05/12 08:46:28 mpi Exp $        */
+/*     $NetBSD: locore.s,v 1.145 1996/05/03 19:41:19 christos Exp $    */
+
+/*-
+ * Copyright (c) 1993, 1994, 1995 Charles M. Hannum.  All rights reserved.
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     @(#)locore.s    7.3 (Berkeley) 5/13/91
+ */
+
+#include "npx.h"
+#include "assym.h"
+#include "apm.h"
+#include "lapic.h"
+#include "ksyms.h"
+
+#include <sys/errno.h>
+#include <sys/syscall.h>
+
+#include <machine/codepatch.h>
+#include <machine/cputypes.h>
+#include <machine/param.h>
+#include <machine/pte.h>
+#include <machine/segments.h>
+#include <machine/specialreg.h>
+#include <machine/trap.h>
+
+#include <dev/isa/isareg.h>
+
+#if NLAPIC > 0
+#include <machine/i82489reg.h>
+#endif
+
+#ifndef SMALL_KERNEL
+/*
+ * As stac/clac SMAP instructions are 3 bytes, we want the fastest
+ * 3 byte nop sequence possible here.  This will be replaced by
+ * stac/clac instructions if SMAP is detected after booting.
+ *
+ * Intel documents multi-byte NOP sequences as being available
+ * on all family 0x6 and 0xf processors (ie 686+)
+ * So use 3 of the single byte nops for compatibility
+ */
+#define SMAP_NOP       .byte 0x90, 0x90, 0x90
+#define SMAP_STAC      CODEPATCH_START                 ;\
+                       SMAP_NOP                        ;\
+                       CODEPATCH_END(CPTAG_STAC)
+#define SMAP_CLAC      CODEPATCH_START                 ;\
+                       SMAP_NOP                        ;\
+                       CODEPATCH_END(CPTAG_CLAC)
+
+#else
+
+#define SMAP_STAC
+#define SMAP_CLAC
+
+#endif
+
+
+/*
+ * override user-land alignment before including asm.h
+ */
+
+#define        ALIGN_DATA      .align  4
+#define        ALIGN_TEXT      .align  4,0x90  /* 4-byte boundaries, 
NOP-filled */
+#define        SUPERALIGN_TEXT .align  16,0x90 /* 16-byte boundaries better 
for 486 */
+#define _ALIGN_TEXT    ALIGN_TEXT
+#include <machine/asm.h>
+
+#define CPL _C_LABEL(lapic_tpr)
+
+#define        GET_CURPCB(reg)                                 \
+       movl    CPUVAR(CURPCB), reg
+
+#define        CHECK_ASTPENDING(treg)                          \
+       movl    CPUVAR(CURPROC),treg            ;       \
+       cmpl    $0, treg                        ;       \
+       je      1f                              ;       \
+       cmpl    $0,P_MD_ASTPENDING(treg)        ;       \
+       1:
+
+#define        CLEAR_ASTPENDING(cpreg)                         \
+       movl    $0,P_MD_ASTPENDING(cpreg)
+
+/*
+ * These are used on interrupt or trap entry or exit.
+ */
+#define        INTRENTRY \
+       cld                     ; \
+       pushl   %eax            ; \
+       pushl   %ecx            ; \
+       pushl   %edx            ; \
+       pushl   %ebx            ; \
+       pushl   %ebp            ; \
+       pushl   %esi            ; \
+       pushl   %edi            ; \
+       pushl   %ds             ; \
+       pushl   %es             ; \
+       pushl   %gs             ; \
+       movl    $GSEL(GDATA_SEL, SEL_KPL),%eax  ; \
+       movw    %ax,%ds         ; \
+       movw    %ax,%es         ; \
+       xorl    %eax,%eax       ; /* $GSEL(GNULL_SEL, SEL_KPL) == 0 */ \
+       movw    %ax,%gs         ; \
+       pushl   %fs             ; \
+       movl    $GSEL(GCPU_SEL, SEL_KPL),%eax   ; \
+       movw    %ax,%fs
+
+#define        INTR_RESTORE_ALL \
+       popl    %fs             ; \
+       popl    %gs             ; \
+       popl    %es             ; \
+       popl    %ds             ; \
+       popl    %edi            ; \
+       popl    %esi            ; \
+       popl    %ebp            ; \
+       popl    %ebx            ; \
+       popl    %edx            ; \
+       popl    %ecx            ; \
+       popl    %eax
+
+#define        INTRFASTEXIT \
+       INTR_RESTORE_ALL        ;\
+       addl    $8,%esp         ; \
+       iret
+
+#define        INTR_FAKE_TRAP  0xbadabada
+
+/*
+ * PTmap is recursive pagemap at top of virtual address space.
+ * Within PTmap, the page directory can be found (third indirection).
+ */
+       .globl  _C_LABEL(PTmap), _C_LABEL(PTD), _C_LABEL(PTDpde)
+       .set    _C_LABEL(PTmap), (PDSLOT_PTE << PDSHIFT)
+       .set    _C_LABEL(PTD), (_C_LABEL(PTmap) + PDSLOT_PTE * NBPG)
+       .set    _C_LABEL(PTDpde), (_C_LABEL(PTD) + PDSLOT_PTE * 4)      # XXX 4 
== sizeof pde
+
+/*
+ * APTmap, APTD is the alternate recursive pagemap.
+ * It's used when modifying another process's page tables.
+ */
+       .globl  _C_LABEL(APTmap), _C_LABEL(APTD), _C_LABEL(APTDpde)
+       .set    _C_LABEL(APTmap), (PDSLOT_APTE << PDSHIFT)
+       .set    _C_LABEL(APTD), (_C_LABEL(APTmap) + PDSLOT_APTE * NBPG)
+       # XXX 4 == sizeof pde
+       .set    _C_LABEL(APTDpde), (_C_LABEL(PTD) + PDSLOT_APTE * 4)
+
+
+       .data
+
+       .globl  _C_LABEL(cpu), _C_LABEL(cpu_id), _C_LABEL(cpu_vendor)
+       .globl  _C_LABEL(cpu_brandstr)
+       .globl  _C_LABEL(cpuid_level)
+       .globl  _C_LABEL(cpu_miscinfo)
+       .globl  _C_LABEL(cpu_feature), _C_LABEL(cpu_ecxfeature)
+       .globl  _C_LABEL(ecpu_feature), _C_LABEL(ecpu_eaxfeature)
+       .globl  _C_LABEL(ecpu_ecxfeature)
+       .globl  _C_LABEL(cpu_cache_eax), _C_LABEL(cpu_cache_ebx)
+       .globl  _C_LABEL(cpu_cache_ecx), _C_LABEL(cpu_cache_edx)
+       .globl  _C_LABEL(cpu_perf_eax)
+       .globl  _C_LABEL(cpu_perf_ebx)
+       .globl  _C_LABEL(cpu_perf_edx)
+       .globl  _C_LABEL(cpu_apmi_edx)
+       .globl  _C_LABEL(cold), _C_LABEL(cnvmem), _C_LABEL(extmem)
+       .globl  _C_LABEL(cpu_pae)
+       .globl  _C_LABEL(esym)
+       .globl  _C_LABEL(ssym)
+       .globl  _C_LABEL(nkptp_max)
+       .globl  _C_LABEL(boothowto), _C_LABEL(bootdev), _C_LABEL(atdevbase)
+       .globl  _C_LABEL(proc0paddr), _C_LABEL(PTDpaddr), _C_LABEL(PTDsize)
+       .globl  _C_LABEL(gdt)
+       .globl  _C_LABEL(bootapiver), _C_LABEL(bootargc), _C_LABEL(bootargv)
+       .globl  _C_LABEL(lapic_tpr)
+
+#if NLAPIC > 0
+       .align NBPG
+       .globl _C_LABEL(local_apic)
+_C_LABEL(local_apic):
+       .space  LAPIC_ID
+       .globl  _C_LABEL(lapic_ppr)
+_C_LABEL(lapic_id):
+       .long   0x00000000
+       .space  LAPIC_TPRI-(LAPIC_ID+4)
+_C_LABEL(lapic_tpr):
+       .space  LAPIC_PPRI-LAPIC_TPRI
+_C_LABEL(lapic_ppr):
+       .space  LAPIC_ISR-LAPIC_PPRI
+_C_LABEL(lapic_isr):
+       .space  NBPG-LAPIC_ISR
+#else
+_C_LABEL(lapic_tpr):
+       .long   0
+#endif
+
+_C_LABEL(cpu):         .long   0       # are we 386, 386sx, 486, 586 or 686
+_C_LABEL(cpu_id):      .long   0       # saved from 'cpuid' instruction
+_C_LABEL(cpu_pae):     .long   0       # are we using PAE paging mode?
+_C_LABEL(cpu_miscinfo):        .long   0       # misc info (apic/brand id) 
from 'cpuid'
+_C_LABEL(cpu_feature): .long   0       # feature flags from 'cpuid' instruction
+_C_LABEL(ecpu_feature): .long  0       # extended feature flags from 'cpuid'
+_C_LABEL(cpu_ecxfeature):.long 0       # ecx feature flags from 'cpuid'
+_C_LABEL(ecpu_eaxfeature): .long 0     # extended eax feature flags
+_C_LABEL(ecpu_ecxfeature): .long 0     # extended ecx feature flags
+_C_LABEL(cpuid_level): .long   -1      # max. lvl accepted by 'cpuid' insn
+_C_LABEL(cpu_cache_eax):.long  0
+_C_LABEL(cpu_cache_ebx):.long  0
+_C_LABEL(cpu_cache_ecx):.long  0
+_C_LABEL(cpu_cache_edx):.long  0
+_C_LABEL(cpu_perf_eax):        .long   0       # arch. perf. mon. flags from 
'cpuid'
+_C_LABEL(cpu_perf_ebx):        .long   0       # arch. perf. mon. flags from 
'cpuid'
+_C_LABEL(cpu_perf_edx):        .long   0       # arch. perf. mon. flags from 
'cpuid'
+_C_LABEL(cpu_apmi_edx):        .long   0       # adv. power management info. 
'cpuid'
+_C_LABEL(cpu_vendor): .space 16        # vendor string returned by 'cpuid' 
instruction
+_C_LABEL(cpu_brandstr):        .space 48 # brand string returned by 'cpuid'
+_C_LABEL(cold):                .long   1       # cold till we are not
+_C_LABEL(ssym):                .long   0       # ptr to start of syms
+_C_LABEL(esym):                .long   0       # ptr to end of syms
+_C_LABEL(cnvmem):      .long   0       # conventional memory size
+_C_LABEL(extmem):      .long   0       # extended memory size
+_C_LABEL(atdevbase):   .long   0       # location of start of iomem in virtual
+_C_LABEL(bootapiver):  .long   0       # /boot API version
+_C_LABEL(bootargc):    .long   0       # /boot argc
+_C_LABEL(bootargv):    .long   0       # /boot argv
+_C_LABEL(bootdev):     .long   0       # device we booted from
+_C_LABEL(proc0paddr):  .long   0
+_C_LABEL(PTDpaddr):    .long   0       # paddr of PTD, for libkvm
+_C_LABEL(PTDsize):     .long   NBPG    # size of PTD, for libkvm
+
+       .text
+
+NENTRY(proc_trampoline)
+#ifdef MULTIPROCESSOR
+       call    _C_LABEL(proc_trampoline_mp)
+#endif
+       movl    $IPL_NONE,CPL
+       pushl   %ebx
+       call    *%esi
+       addl    $4,%esp
+       INTRFASTEXIT
+       /* NOTREACHED */
+
+       /* This must come before any use of the CODEPATCH macros */
+       .section .codepatch,"a"
+       .align  8
+       .globl _C_LABEL(codepatch_begin)
+_C_LABEL(codepatch_begin):
+       .previous
+
+       .section .codepatchend,"a"
+       .globl _C_LABEL(codepatch_end)
+_C_LABEL(codepatch_end):
+       .previous
+
+/*****************************************************************************/
+
+/*
+ * Signal trampoline; copied to top of user stack.
+ */
+       .section .rodata
+       .globl  _C_LABEL(sigcode)
+_C_LABEL(sigcode):
+       call    *SIGF_HANDLER(%esp)
+       leal    SIGF_SC(%esp),%eax      # scp (the call may have clobbered the
+                                       # copy at SIGF_SCP(%esp))
+       pushl   %eax
+       pushl   %eax                    # junk to fake return address
+       movl    $SYS_sigreturn,%eax
+       int     $0x80                   # enter kernel with args on stack
+       .globl  _C_LABEL(sigcoderet)
+_C_LABEL(sigcoderet):
+       movl    $SYS_exit,%eax
+       int     $0x80                   # exit if sigreturn fails
+       .globl  _C_LABEL(esigcode)
+_C_LABEL(esigcode):
+
+       .globl  _C_LABEL(sigfill)
+_C_LABEL(sigfill):
+       int3
+_C_LABEL(esigfill):
+
+       .data
+       .globl  _C_LABEL(sigfillsiz)
+_C_LABEL(sigfillsiz):
+       .long   _C_LABEL(esigfill) - _C_LABEL(sigfill)
+
+       .text
+
+/*****************************************************************************/
+
+/*
+ * The following primitives are used to fill and copy regions of memory.
+ */
+
+/* Frame pointer reserve on stack. */
+#ifdef DDB
+#define FPADD 4
+#else
+#define FPADD 0
+#endif
+
+/*
+ * kcopy(caddr_t from, caddr_t to, size_t len);
+ * Copy len bytes, abort on fault.
+ */
+ENTRY(kcopy)
+#ifdef DDB
+       pushl   %ebp
+       movl    %esp,%ebp
+#endif
+       pushl   %esi
+       pushl   %edi
+       GET_CURPCB(%eax)                # load curpcb into eax and set on-fault
+       pushl   PCB_ONFAULT(%eax)
+       movl    $_C_LABEL(copy_fault), PCB_ONFAULT(%eax)
+
+       movl    16+FPADD(%esp),%esi
+       movl    20+FPADD(%esp),%edi
+       movl    24+FPADD(%esp),%ecx
+       movl    %edi,%eax
+       subl    %esi,%eax
+       cmpl    %ecx,%eax               # overlapping?
+       jb      1f
+       shrl    $2,%ecx                 # nope, copy forward by 32-bit words
+       rep
+       movsl
+       movl    24+FPADD(%esp),%ecx
+       andl    $3,%ecx                 # any bytes left?
+       rep
+       movsb
+
+       GET_CURPCB(%edx)                # XXX save curpcb?
+       popl    PCB_ONFAULT(%edx)
+       popl    %edi
+       popl    %esi
+       xorl    %eax,%eax
+#ifdef DDB
+       leave
+#endif
+       ret
+
+       ALIGN_TEXT
+1:     addl    %ecx,%edi               # copy backward
+       addl    %ecx,%esi
+       std
+       andl    $3,%ecx                 # any fractional bytes?
+       decl    %edi
+       decl    %esi
+       rep
+       movsb
+       movl    24+FPADD(%esp),%ecx     # copy remainder by 32-bit words
+       shrl    $2,%ecx
+       subl    $3,%esi
+       subl    $3,%edi
+       rep
+       movsl
+       cld
+
+       GET_CURPCB(%edx)
+       popl    PCB_ONFAULT(%edx)
+       popl    %edi
+       popl    %esi
+       xorl    %eax,%eax
+#ifdef DDB
+       leave
+#endif
+       ret
+       
+/*****************************************************************************/
+
+/*
+ * The following primitives are used to copy data in and out of the user's
+ * address space.
+ */
+
+/*
+ * copyout(caddr_t from, caddr_t to, size_t len);
+ * Copy len bytes into the user's address space.
+ */
+ENTRY(copyout)
+#ifdef DDB
+       pushl   %ebp
+       movl    %esp,%ebp
+#endif
+       pushl   %esi
+       pushl   %edi
+       pushl   $0      
+       
+       movl    16+FPADD(%esp),%esi
+       movl    20+FPADD(%esp),%edi
+       movl    24+FPADD(%esp),%eax
+
+       /*
+        * We check that the end of the destination buffer is not past the end
+        * of the user's address space.  If it's not, then we only need to
+        * check that each page is writable.  The 486 will do this for us; the
+        * 386 will not.  (We assume that pages in user space that are not
+        * writable by the user are not writable by the kernel either.)
+        */
+       movl    %edi,%edx
+       addl    %eax,%edx
+       jc      _C_LABEL(copy_fault)
+       cmpl    $VM_MAXUSER_ADDRESS,%edx
+       ja      _C_LABEL(copy_fault)
+
+       GET_CURPCB(%edx)
+       movl    $_C_LABEL(copy_fault),PCB_ONFAULT(%edx)
+       SMAP_STAC
+
+       /* bcopy(%esi, %edi, %eax); */
+       movl    %eax,%ecx
+       shrl    $2,%ecx
+       rep
+       movsl
+       movl    %eax,%ecx
+       andl    $3,%ecx
+       rep
+       movsb
+
+       SMAP_CLAC
+       popl    PCB_ONFAULT(%edx)
+       popl    %edi
+       popl    %esi
+       xorl    %eax,%eax
+#ifdef DDB
+       leave
+#endif
+       ret
+
+/*
+ * copyin(caddr_t from, caddr_t to, size_t len);
+ * Copy len bytes from the user's address space.
+ */
+ENTRY(copyin)
+#ifdef DDB
+       pushl   %ebp
+       movl    %esp,%ebp
+#endif
+       pushl   %esi
+       pushl   %edi
+       GET_CURPCB(%eax)
+       pushl   $0
+       movl    $_C_LABEL(copy_fault),PCB_ONFAULT(%eax)
+       SMAP_STAC
+       
+       movl    16+FPADD(%esp),%esi
+       movl    20+FPADD(%esp),%edi
+       movl    24+FPADD(%esp),%eax
+
+       /*
+        * We check that the end of the destination buffer is not past the end
+        * of the user's address space.  If it's not, then we only need to
+        * check that each page is readable, and the CPU will do that for us.
+        */
+       movl    %esi,%edx
+       addl    %eax,%edx
+       jc      _C_LABEL(copy_fault)
+       cmpl    $VM_MAXUSER_ADDRESS,%edx
+       ja      _C_LABEL(copy_fault)
+
+       /* bcopy(%esi, %edi, %eax); */
+       movl    %eax,%ecx
+       shrl    $2,%ecx
+       rep
+       movsl
+       movb    %al,%cl
+       andb    $3,%cl
+       rep
+       movsb
+
+       SMAP_CLAC
+       GET_CURPCB(%edx)
+       popl    PCB_ONFAULT(%edx)
+       popl    %edi
+       popl    %esi
+       xorl    %eax,%eax
+#ifdef DDB
+       leave
+#endif
+       ret
+
+ENTRY(copy_fault)
+       SMAP_CLAC
+       GET_CURPCB(%edx)
+       popl    PCB_ONFAULT(%edx)
+       popl    %edi
+       popl    %esi
+       movl    $EFAULT,%eax
+#ifdef DDB
+       leave
+#endif
+       ret
+
+/*
+ * copyoutstr(caddr_t from, caddr_t to, size_t maxlen, size_t *lencopied);
+ * Copy a NUL-terminated string, at most maxlen characters long, into the
+ * user's address space.  Return the number of characters copied (including the
+ * NUL) in *lencopied.  If the string is too long, return ENAMETOOLONG; else
+ * return 0 or EFAULT.
+ */
+ENTRY(copyoutstr)
+#ifdef DDB
+       pushl   %ebp
+       movl    %esp,%ebp
+#endif
+       pushl   %esi
+       pushl   %edi
+
+       movl    12+FPADD(%esp),%esi             # esi = from
+       movl    16+FPADD(%esp),%edi             # edi = to
+       movl    20+FPADD(%esp),%edx             # edx = maxlen
+
+5:     GET_CURPCB(%eax)
+       movl    $_C_LABEL(copystr_fault),PCB_ONFAULT(%eax)
+       SMAP_STAC
+       /*
+        * Get min(%edx, VM_MAXUSER_ADDRESS-%edi).
+        */
+       movl    $VM_MAXUSER_ADDRESS,%eax
+       subl    %edi,%eax
+       jbe     _C_LABEL(copystr_fault)         # die if CF == 1 || ZF == 1
+                                               # i.e. make sure that %edi
+                                               # is below VM_MAXUSER_ADDRESS
+
+       cmpl    %edx,%eax
+       jae     1f
+       movl    %eax,%edx
+       movl    %eax,20+FPADD(%esp)
+
+1:     incl    %edx
+
+1:     decl    %edx
+       jz      2f
+       lodsb
+       stosb
+       testb   %al,%al
+       jnz     1b
+
+       /* Success -- 0 byte reached. */
+       decl    %edx
+       xorl    %eax,%eax
+       jmp     copystr_return
+
+2:     /* edx is zero -- return EFAULT or ENAMETOOLONG. */
+       cmpl    $VM_MAXUSER_ADDRESS,%edi
+       jae     _C_LABEL(copystr_fault)
+       movl    $ENAMETOOLONG,%eax
+       jmp     copystr_return
+
+/*
+ * copyinstr(caddr_t from, caddr_t to, size_t maxlen, size_t *lencopied);
+ * Copy a NUL-terminated string, at most maxlen characters long, from the
+ * user's address space.  Return the number of characters copied (including the
+ * NUL) in *lencopied.  If the string is too long, return ENAMETOOLONG; else
+ * return 0 or EFAULT.
+ */
+ENTRY(copyinstr)
+#ifdef DDB
+       pushl   %ebp
+       movl    %esp,%ebp
+#endif
+       pushl   %esi
+       pushl   %edi
+       GET_CURPCB(%ecx)
+       movl    $_C_LABEL(copystr_fault),PCB_ONFAULT(%ecx)
+       SMAP_STAC
+
+       movl    12+FPADD(%esp),%esi             # %esi = from
+       movl    16+FPADD(%esp),%edi             # %edi = to
+       movl    20+FPADD(%esp),%edx             # %edx = maxlen
+
+       /*
+        * Get min(%edx, VM_MAXUSER_ADDRESS-%esi).
+        */
+       movl    $VM_MAXUSER_ADDRESS,%eax
+       subl    %esi,%eax
+       jbe     _C_LABEL(copystr_fault)         # Error if CF == 1 || ZF == 1
+                                               # i.e. make sure that %esi
+                                               # is below VM_MAXUSER_ADDRESS
+       cmpl    %edx,%eax
+       jae     1f
+       movl    %eax,%edx
+       movl    %eax,20+FPADD(%esp)
+
+1:     incl    %edx
+
+1:     decl    %edx
+       jz      2f
+       lodsb
+       stosb
+       testb   %al,%al
+       jnz     1b
+
+       /* Success -- 0 byte reached. */
+       decl    %edx
+       xorl    %eax,%eax
+       jmp     copystr_return
+
+2:     /* edx is zero -- return EFAULT or ENAMETOOLONG. */
+       cmpl    $VM_MAXUSER_ADDRESS,%esi
+       jae     _C_LABEL(copystr_fault)
+       movl    $ENAMETOOLONG,%eax
+       jmp     copystr_return
+
+ENTRY(copystr_fault)
+       movl    $EFAULT,%eax
+
+copystr_return:
+       SMAP_CLAC
+       /* Set *lencopied and return %eax. */
+       GET_CURPCB(%ecx)
+       movl    $0,PCB_ONFAULT(%ecx)
+       movl    20+FPADD(%esp),%ecx
+       subl    %edx,%ecx
+       movl    24+FPADD(%esp),%edx
+       testl   %edx,%edx
+       jz      8f
+       movl    %ecx,(%edx)
+
+8:     popl    %edi
+       popl    %esi
+#ifdef DDB
+       leave
+#endif
+       ret
+
+/*
+ * copystr(caddr_t from, caddr_t to, size_t maxlen, size_t *lencopied);
+ * Copy a NUL-terminated string, at most maxlen characters long.  Return the
+ * number of characters copied (including the NUL) in *lencopied.  If the
+ * string is too long, return ENAMETOOLONG; else return 0.
+ */
+ENTRY(copystr)
+#ifdef DDB
+       pushl   %ebp
+       movl    %esp,%ebp
+#endif
+       pushl   %esi
+       pushl   %edi
+
+       movl    12+FPADD(%esp),%esi             # esi = from
+       movl    16+FPADD(%esp),%edi             # edi = to
+       movl    20+FPADD(%esp),%edx             # edx = maxlen
+       incl    %edx
+
+1:     decl    %edx
+       jz      4f
+       lodsb
+       stosb
+       testb   %al,%al
+       jnz     1b
+
+       /* Success -- 0 byte reached. */
+       decl    %edx
+       xorl    %eax,%eax
+       jmp     6f
+
+4:     /* edx is zero -- return ENAMETOOLONG. */
+       movl    $ENAMETOOLONG,%eax
+
+6:     /* Set *lencopied and return %eax. */
+       movl    20+FPADD(%esp),%ecx
+       subl    %edx,%ecx
+       movl    24+FPADD(%esp),%edx
+       testl   %edx,%edx
+       jz      7f
+       movl    %ecx,(%edx)
+
+7:     popl    %edi
+       popl    %esi
+#ifdef DDB
+       leave
+#endif
+       ret
+
+/*****************************************************************************/
+
+/*
+ * The following is i386-specific nonsense.
+ */
+
+/*
+ * void lgdt(struct region_descriptor *rdp);
+ * Change the global descriptor table.
+ */
+NENTRY(lgdt)
+       /* Reload the descriptor table. */
+       movl    4(%esp),%eax
+       lgdt    (%eax)
+       /* Flush the prefetch q. */
+       jmp     1f
+       nop
+1:     /* Reload "stale" selectors. */
+       movl    $GSEL(GDATA_SEL, SEL_KPL),%eax
+       movw    %ax,%ds
+       movw    %ax,%es
+       movw    %ax,%ss
+       movl    $GSEL(GCPU_SEL, SEL_KPL),%eax
+       movw    %ax,%fs
+       /* Reload code selector by doing intersegment return. */
+       popl    %eax
+       pushl   $GSEL(GCODE_SEL, SEL_KPL)
+       pushl   %eax
+       lret
+
+ENTRY(setjmp)
+       movl    4(%esp),%eax
+       movl    %ebx,(%eax)             # save ebx
+       movl    %esp,4(%eax)            # save esp
+       movl    %ebp,8(%eax)            # save ebp
+       movl    %esi,12(%eax)           # save esi
+       movl    %edi,16(%eax)           # save edi
+       movl    (%esp),%edx             # get rta
+       movl    %edx,20(%eax)           # save eip
+       xorl    %eax,%eax               # return (0);
+       ret
+
+ENTRY(longjmp)
+       movl    4(%esp),%eax
+       movl    (%eax),%ebx             # restore ebx
+       movl    4(%eax),%esp            # restore esp
+       movl    8(%eax),%ebp            # restore ebp
+       movl    12(%eax),%esi           # restore esi
+       movl    16(%eax),%edi           # restore edi
+       movl    20(%eax),%edx           # get rta
+       movl    %edx,(%esp)             # put in return frame
+       xorl    %eax,%eax               # return (1);
+       incl    %eax
+       ret
+
+/*****************************************************************************/
+               
+/*
+ * cpu_switchto(struct proc *old, struct proc *new)
+ * Switch from the "old" proc to the "new" proc. If "old" is NULL, we
+ * don't need to bother saving old context.
+ */
+ENTRY(cpu_switchto)
+       pushl   %ebx
+       pushl   %esi
+       pushl   %edi
+
+       movl    16(%esp), %esi
+       movl    20(%esp), %edi
+
+       /* If old process exited, don't bother. */
+       testl   %esi,%esi
+       jz      switch_exited
+
+       /* Save old stack pointers. */
+       movl    P_ADDR(%esi),%ebx
+       movl    %esp,PCB_ESP(%ebx)
+       movl    %ebp,PCB_EBP(%ebx)
+
+switch_exited:
+       /* Restore saved context. */
+
+       /* No interrupts while loading new state. */
+       cli
+
+       /* Record new process. */
+       movl    %edi, CPUVAR(CURPROC)
+       movb    $SONPROC, P_STAT(%edi)
+
+       /* Restore stack pointers. */
+       movl    P_ADDR(%edi),%ebx
+       movl    PCB_ESP(%ebx),%esp
+       movl    PCB_EBP(%ebx),%ebp
+
+       /* Record new pcb. */
+       movl    %ebx, CPUVAR(CURPCB)
+
+       /*
+        * Activate the address space.  The pcb copy of %cr3 will
+        * be refreshed from the pmap, and because we're
+        * curproc they'll both be reloaded into the CPU.
+        */
+       pushl   %edi
+       pushl   %esi
+       call    _C_LABEL(pmap_switch)
+       addl    $8,%esp
+
+       /* Load TSS info. */
+       movl    CPUVAR(GDT),%eax
+       movl    P_MD_TSS_SEL(%edi),%edx
+
+       /* Switch TSS. */
+       andl    $~0x0200,4-SEL_KPL(%eax,%edx,1)
+       ltr     %dx
+
+       /* Restore cr0 (including FPU state). */
+       movl    PCB_CR0(%ebx),%ecx
+#ifdef MULTIPROCESSOR
+       /*
+        * If our floating point registers are on a different CPU,
+        * clear CR0_TS so we'll trap rather than reuse bogus state.
+        */
+       movl    CPUVAR(SELF), %esi
+       cmpl    PCB_FPCPU(%ebx), %esi
+       jz      1f
+       orl     $CR0_TS,%ecx
+1:     
+#endif 
+       movl    %ecx,%cr0
+
+       /* Interrupts are okay again. */
+       sti
+
+       popl    %edi
+       popl    %esi
+       popl    %ebx
+       ret
+
+ENTRY(cpu_idle_enter)
+       movl    _C_LABEL(cpu_idle_enter_fcn),%eax
+       cmpl    $0,%eax
+       je      1f
+       jmpl    *%eax
+1:
+       ret
+
+ENTRY(cpu_idle_cycle)
+       movl    _C_LABEL(cpu_idle_cycle_fcn),%eax
+       cmpl    $0,%eax
+       je      1f
+       call    *%eax
+       ret
+1:
+       sti
+       hlt
+       ret
+
+ENTRY(cpu_idle_leave)
+       movl    _C_LABEL(cpu_idle_leave_fcn),%eax
+       cmpl    $0,%eax
+       je      1f
+       jmpl    *%eax
+1:
+       ret
+
+/*
+ * savectx(struct pcb *pcb);
+ * Update pcb, saving current processor state.
+ */
+ENTRY(savectx)
+       movl    4(%esp),%edx            # edx = p->p_addr
+
+       /* Save stack pointers. */
+       movl    %esp,PCB_ESP(%edx)
+       movl    %ebp,PCB_EBP(%edx)
+
+       movl    PCB_FLAGS(%edx),%ecx
+       orl     $PCB_SAVECTX,%ecx
+       movl    %ecx,PCB_FLAGS(%edx)
+
+       ret
+
+/*****************************************************************************/
+
+/*
+ * Trap and fault vector routines
+ *
+ * On exit from the kernel to user mode, we always need to check for ASTs.  In
+ * addition, we need to do this atomically; otherwise an interrupt may occur
+ * which causes an AST, but it won't get processed until the next kernel entry
+ * (possibly the next clock tick).  Thus, we disable interrupt before checking,
+ * and only enable them again on the final `iret' or before calling the AST
+ * handler.
+ */
+#define        IDTVEC(name)    ALIGN_TEXT; .globl X##name; X##name:
+
+#define        TRAP(a)         pushl $(a) ; jmp _C_LABEL(alltraps)
+#define        ZTRAP(a)        pushl $0 ; TRAP(a)
+
+
+       .text
+IDTVEC(div)
+       ZTRAP(T_DIVIDE)
+IDTVEC(dbg)
+       subl    $4,%esp
+       pushl   %eax
+       movl    %dr6,%eax
+       movl    %eax,4(%esp)
+       andb    $~0xf,%al
+       movl    %eax,%dr6
+       popl    %eax
+       TRAP(T_TRCTRAP)
+IDTVEC(nmi)
+       ZTRAP(T_NMI)
+IDTVEC(bpt)
+       ZTRAP(T_BPTFLT)
+IDTVEC(ofl)
+       ZTRAP(T_OFLOW)
+IDTVEC(bnd)
+       ZTRAP(T_BOUND)
+IDTVEC(ill)
+       ZTRAP(T_PRIVINFLT)
+IDTVEC(dna)
+#if NNPX > 0
+       pushl   $0                      # dummy error code
+       pushl   $T_DNA
+       INTRENTRY
+#ifdef MULTIPROCESSOR
+       pushl   CPUVAR(SELF)
+#else
+       pushl   $_C_LABEL(cpu_info_primary)
+#endif
+       call    *_C_LABEL(npxdna_func)
+       addl    $4,%esp
+       testl   %eax,%eax
+       jz      calltrap
+       INTRFASTEXIT
+#else
+       ZTRAP(T_DNA)
+#endif
+IDTVEC(dble)
+       TRAP(T_DOUBLEFLT)
+IDTVEC(fpusegm)
+       ZTRAP(T_FPOPFLT)
+IDTVEC(tss)
+       TRAP(T_TSSFLT)
+IDTVEC(missing)
+       TRAP(T_SEGNPFLT)
+IDTVEC(stk)
+       TRAP(T_STKFLT)
+IDTVEC(prot)
+       TRAP(T_PROTFLT)
+IDTVEC(f00f_redirect)
+       pushl   $T_PAGEFLT
+       INTRENTRY
+       testb   $PGEX_U,TF_ERR(%esp)
+       jnz     calltrap
+       movl    %cr2,%eax
+       subl    _C_LABEL(idt),%eax
+       cmpl    $(6*8),%eax
+       jne     calltrap
+       movb    $T_PRIVINFLT,TF_TRAPNO(%esp)
+       jmp     calltrap
+IDTVEC(page)
+       TRAP(T_PAGEFLT)
+IDTVEC(rsvd)
+       ZTRAP(T_RESERVED)
+IDTVEC(mchk)
+       ZTRAP(T_MACHK)
+IDTVEC(simd)
+       ZTRAP(T_XFTRAP)
+IDTVEC(intrspurious)
+       /*
+        * The Pentium Pro local APIC may erroneously call this vector for a
+        * default IR7.  Just ignore it.
+        *
+        * (The local APIC does this when CPL is raised while it's on the
+        * way to delivering an interrupt.. presumably enough has been set
+        * up that it's inconvenient to abort delivery completely..)
+        */
+       iret
+IDTVEC(fpu)
+#if NNPX > 0
+       /*
+        * Handle like an interrupt so that we can call npxintr to clear the
+        * error.  It would be better to handle npx interrupts as traps but
+        * this is difficult for nested interrupts.
+        */
+       subl    $8,%esp                 /* space for tf_{err,trapno} */
+       INTRENTRY
+       pushl   CPL                     # if_ppl in intrframe
+       pushl   %esp                    # push address of intrframe
+       incl    _C_LABEL(uvmexp)+V_TRAP
+       call    _C_LABEL(npxintr)
+       addl    $8,%esp                 # pop address and if_ppl
+       INTRFASTEXIT
+#else
+       ZTRAP(T_ARITHTRAP)
+#endif
+IDTVEC(align)
+       ZTRAP(T_ALIGNFLT)
+       /* 18 - 31 reserved for future exp */
+
+/*
+ * If an error is detected during trap, syscall, or interrupt exit, trap() will
+ * change %eip to point to one of these labels.  We clean up the stack, if
+ * necessary, and resume as if we were handling a general protection fault.
+ * This will cause the process to get a SIGBUS.
+ */
+NENTRY(resume_iret)
+       ZTRAP(T_PROTFLT)
+NENTRY(resume_pop_ds)
+       pushl   %es
+       movl    $GSEL(GDATA_SEL, SEL_KPL),%eax
+       movw    %ax,%es
+NENTRY(resume_pop_es)
+       pushl   %gs
+       xorl    %eax,%eax       /* $GSEL(GNULL_SEL, SEL_KPL) == 0 */
+       movw    %ax,%gs
+NENTRY(resume_pop_gs)
+       pushl   %fs
+       movl    $GSEL(GCPU_SEL, SEL_KPL),%eax
+       movw    %ax,%fs
+NENTRY(resume_pop_fs)
+       movl    $T_PROTFLT,TF_TRAPNO(%esp)
+       sti
+       jmp     calltrap
+
+/*
+ * All traps go through here. Call the generic trap handler, and
+ * check for ASTs afterwards.
+ */
+NENTRY(alltraps)
+       INTRENTRY
+       sti
+calltrap:
+#ifdef DIAGNOSTIC
+       movl    CPL,%ebx
+#endif /* DIAGNOSTIC */
+#if !defined(GPROF) && defined(DDBPROF)
+       cmpl    $T_BPTFLT,TF_TRAPNO(%esp)
+       jne     .Lreal_trap
+
+       pushl   %esp
+       call    _C_LABEL(db_prof_hook)
+       addl    $4,%esp
+       cmpl    $1,%eax
+       jne     .Lreal_trap
+
+       /*
+        * Abuse the error field to indicate that INTRFASTEXIT needs
+        * to emulate the patched instruction.
+        */
+       movl    $INTR_FAKE_TRAP, TF_ERR(%esp)
+       jz      2f
+.Lreal_trap:
+#endif /* !defined(GPROF) && defined(DDBPROF) */
+       pushl   %esp
+       call    _C_LABEL(trap)
+       addl    $4,%esp
+2:     /* Check for ASTs on exit to user mode. */
+       cli
+       CHECK_ASTPENDING(%ecx)
+       je      1f
+       testb   $SEL_RPL,TF_CS(%esp)
+#ifdef VM86
+       jnz     5f
+       testl   $PSL_VM,TF_EFLAGS(%esp)
+#endif
+       jz      1f
+5:     CLEAR_ASTPENDING(%ecx)
+       sti
+       pushl   %esp
+       call    _C_LABEL(ast)
+       addl    $4,%esp
+       jmp     2b
+1:
+#if !defined(GPROF) && defined(DDBPROF)
+       /*
+        * If we are returning from a probe trap we need to fix the
+        * stack layout and emulate the patched instruction.
+        *
+        * The code below does that by trashing %eax, so it MUST be
+        * restored afterward.
+        */
+       cmpl    $INTR_FAKE_TRAP, TF_ERR(%esp)
+       je      .Lprobe_fixup
+#endif /* !defined(GPROF) && defined(DDBPROF) */
+#ifndef DIAGNOSTIC
+       INTRFASTEXIT
+#else
+       cmpl    CPL,%ebx
+       jne     3f
+       INTRFASTEXIT
+3:     sti
+       pushl   $4f
+       call    _C_LABEL(printf)
+       addl    $4,%esp
+#if defined(DDB) && 0
+       int     $3
+#endif /* DDB */
+       movl    %ebx,CPL
+       jmp     2b
+4:     .asciz  "WARNING: SPL NOT LOWERED ON TRAP EXIT\n"
+#endif /* DIAGNOSTIC */
+
+#if !defined(GPROF) && defined(DDBPROF)
+.Lprobe_fixup:
+       /* Restore all register unwinding the stack. */
+       INTR_RESTORE_ALL
+
+       /*
+        * Use the space left by ``err'' and ``trapno'' to emulate
+        * "pushl %ebp".
+        *
+        * Temporarily save %eax.
+        */
+       movl    %eax,0(%esp)
+
+       /* Shift hardware-saved registers: eip, cs, eflags */
+       movl    8(%esp),%eax
+       movl    %eax,4(%esp)
+       movl    12(%esp),%eax
+       movl    %eax,8(%esp)
+       movl    16(%esp),%eax
+       movl    %eax,12(%esp)
+
+       /* Store %ebp in the expected location to finish the emulation. */
+       movl    %ebp,16(%esp)
+
+       popl    %eax
+       iret
+#endif /* !defined(GPROF) && defined(DDBPROF) */
+/*
+ * Trap gate entry for syscall
+ */
+IDTVEC(syscall)
+       subl    $8,%esp                 /* space for tf_{err,trapno} */
+       INTRENTRY
+       pushl   %esp
+       call    _C_LABEL(syscall)
+       addl    $4,%esp
+2:     /* Check for ASTs on exit to user mode. */
+       cli
+       CHECK_ASTPENDING(%ecx)
+       je      1f
+       /* Always returning to user mode here. */
+       CLEAR_ASTPENDING(%ecx)
+       sti
+       pushl   %esp
+       call    _C_LABEL(ast)
+       addl    $4,%esp
+       jmp     2b
+1:     INTRFASTEXIT
+
+#include <i386/i386/vector.s>
+#include <i386/isa/icu.s>
+
+/*
+ * bzero (void *b, size_t len)
+ *     write len zero bytes to the string b.
+ */
+
+ENTRY(bzero)
+       pushl   %edi
+       movl    8(%esp),%edi
+       movl    12(%esp),%edx
+
+       xorl    %eax,%eax               /* set fill data to 0 */
+
+       /*
+        * if the string is too short, it's really not worth the overhead
+        * of aligning to word boundaries, etc.  So we jump to a plain
+        * unaligned set.
+        */
+       cmpl    $16,%edx
+       jb      7f
+
+       movl    %edi,%ecx               /* compute misalignment */
+       negl    %ecx
+       andl    $3,%ecx
+       subl    %ecx,%edx
+       rep                             /* zero until word aligned */
+       stosb
+
+       cmpl    $CPUCLASS_486,_C_LABEL(cpu_class)
+       jne     8f
+
+       movl    %edx,%ecx
+       shrl    $6,%ecx
+       jz      8f
+       andl    $63,%edx
+1:     movl    %eax,(%edi)
+       movl    %eax,4(%edi)
+       movl    %eax,8(%edi)
+       movl    %eax,12(%edi)
+       movl    %eax,16(%edi)
+       movl    %eax,20(%edi)
+       movl    %eax,24(%edi)
+       movl    %eax,28(%edi)
+       movl    %eax,32(%edi)
+       movl    %eax,36(%edi)
+       movl    %eax,40(%edi)
+       movl    %eax,44(%edi)
+       movl    %eax,48(%edi)
+       movl    %eax,52(%edi)
+       movl    %eax,56(%edi)
+       movl    %eax,60(%edi)
+       addl    $64,%edi
+       decl    %ecx
+       jnz     1b
+
+8:     movl    %edx,%ecx               /* zero by words */
+       shrl    $2,%ecx
+       andl    $3,%edx
+       rep
+       stosl
+
+7:     movl    %edx,%ecx               /* zero remainder bytes */
+       rep
+       stosb
+
+       popl    %edi
+       ret
+
+#if !defined(SMALL_KERNEL)
+ENTRY(sse2_pagezero)
+       pushl   %ebx
+       movl    8(%esp),%ecx
+       movl    %ecx,%eax
+       addl    $4096,%eax
+       xor     %ebx,%ebx
+1:
+       movnti  %ebx,(%ecx)
+       addl    $4,%ecx
+       cmpl    %ecx,%eax
+       jne     1b
+       sfence
+       popl    %ebx
+       ret
+
+ENTRY(i686_pagezero)
+       pushl   %edi
+       pushl   %ebx
+
+       movl    12(%esp), %edi
+       movl    $1024, %ecx
+
+       ALIGN_TEXT
+1:
+       xorl    %eax, %eax
+       repe
+       scasl
+       jnz     2f
+
+       popl    %ebx
+       popl    %edi
+       ret
+
+       ALIGN_TEXT
+
+2:
+       incl    %ecx
+       subl    $4, %edi
+
+       movl    %ecx, %edx
+       cmpl    $16, %ecx
+
+       jge     3f
+
+       movl    %edi, %ebx
+       andl    $0x3f, %ebx
+       shrl    %ebx
+       shrl    %ebx
+       movl    $16, %ecx
+       subl    %ebx, %ecx
+
+3:
+       subl    %ecx, %edx
+       rep
+       stosl
+
+       movl    %edx, %ecx
+       testl   %edx, %edx
+       jnz     1b
+
+       popl    %ebx
+       popl    %edi
+       ret
+#endif
+
+/*
+ * int cpu_paenable(void *);
+ */
+ENTRY(cpu_paenable)
+       movl    $-1, %eax
+       testl   $CPUID_PAE, _C_LABEL(cpu_feature)
+       jz      1f
+
+       pushl   %esi
+       pushl   %edi
+       movl    12(%esp), %esi
+       movl    %cr3, %edi
+       orl     $0xfe0, %edi    /* PDPT will be in the last four slots! */
+       movl    %edi, %cr3
+       addl    $KERNBASE, %edi /* and make it back virtual again */
+       movl    $8, %ecx
+       rep
+       movsl
+
+       movl    $MSR_EFER, %ecx
+       rdmsr
+       orl     $EFER_NXE, %eax
+       wrmsr
+
+       movl    %cr4, %eax
+       orl     $CR4_PAE, %eax
+       movl    %eax, %cr4      /* BANG!!! */
+
+       movl    12(%esp), %eax
+       subl    $KERNBASE, %eax
+       movl    %eax, %cr3      /* reload real PDPT */
+       movl    $4*NBPG, %eax
+       movl    %eax, _C_LABEL(PTDsize)
+
+       xorl    %eax, %eax
+       popl    %edi
+       popl    %esi
+1:
+       ret
+
+#if NLAPIC > 0
+#include <i386/i386/apicvec.s>
+#endif
+
+#include <i386/i386/mutex.S>
+
+.globl _C_LABEL(_stac)
+_C_LABEL(_stac):
+       stac
+
+.globl _C_LABEL(_clac)
+_C_LABEL(_clac):
+       clac
Index: conf/makegap.sh
===================================================================
RCS file: conf/makegap.sh
diff -N conf/makegap.sh
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ conf/makegap.sh     30 May 2017 07:33:02 -0000
@@ -0,0 +1,28 @@
+#!/bin/sh -
+
+cat << __EOF__
+#include <machine/asm.h>
+#include <machine/param.h>
+
+       .text
+       .space  $RANDOM*3, 0xcc
+       .align  PAGE_SIZE, 0xcc
+
+       .globl  endboot
+_C_LABEL(endboot):
+       .space  PAGE_SIZE + $RANDOM % PAGE_SIZE,  0xcc
+       .align  16, 0xcc
+
+       /*
+        * Randomly bias future data, bss, and rodata objects,
+        * does not help for objects in locore.S though
+         */
+       .data
+       .space  $RANDOM % PAGE_SIZE
+
+       .bss
+       .space  $RANDOM % PAGE_SIZE
+
+       .section rodata
+       .space  $RANDOM % PAGE_SIZE
+__EOF__

Reply via email to