Module Name: src Committed By: reinoud Date: Sat Jan 14 17:42:52 UTC 2012
Modified Files: src/sys/arch/usermode/dev: cpu.c src/sys/arch/usermode/include: pcb.h src/sys/arch/usermode/target/i386: cpu_i386.c src/sys/arch/usermode/target/x86_64: cpu_x86_64.c src/sys/arch/usermode/usermode: copy.c pmap.c syscall.c thunk.c trap.c Log Message: Revamp the NetBSD/usermode pagefault and illegal instruction handing. It now can handle recursive entry and is a lot more memory tight compared to the old implementation. Performance wise: * slightly less number of syscalls/sec possible though could be optimized * a lot faster context creation / destruction making overall operation faster. To generate a diff of this commit: cvs rdiff -u -r1.63 -r1.64 src/sys/arch/usermode/dev/cpu.c cvs rdiff -u -r1.16 -r1.17 src/sys/arch/usermode/include/pcb.h cvs rdiff -u -r1.1 -r1.2 src/sys/arch/usermode/target/i386/cpu_i386.c cvs rdiff -u -r1.1 -r1.2 src/sys/arch/usermode/target/x86_64/cpu_x86_64.c cvs rdiff -u -r1.6 -r1.7 src/sys/arch/usermode/usermode/copy.c cvs rdiff -u -r1.101 -r1.102 src/sys/arch/usermode/usermode/pmap.c cvs rdiff -u -r1.20 -r1.21 src/sys/arch/usermode/usermode/syscall.c cvs rdiff -u -r1.77 -r1.78 src/sys/arch/usermode/usermode/thunk.c cvs rdiff -u -r1.55 -r1.56 src/sys/arch/usermode/usermode/trap.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/usermode/dev/cpu.c diff -u src/sys/arch/usermode/dev/cpu.c:1.63 src/sys/arch/usermode/dev/cpu.c:1.64 --- src/sys/arch/usermode/dev/cpu.c:1.63 Thu Jan 12 13:28:54 2012 +++ src/sys/arch/usermode/dev/cpu.c Sat Jan 14 17:42:51 2012 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.c,v 1.63 2012/01/12 13:28:54 reinoud Exp $ */ +/* $NetBSD: cpu.c,v 1.64 2012/01/14 17:42:51 reinoud Exp $ */ /*- * Copyright (c) 2007 Jared D. McNeill <jmcne...@invisible.ca> @@ -30,7 +30,7 @@ #include "opt_hz.h" #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.63 2012/01/12 13:28:54 reinoud Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.64 2012/01/14 17:42:51 reinoud Exp $"); #include <sys/param.h> #include <sys/conf.h> @@ -78,9 +78,13 @@ typedef struct cpu_softc { struct cpu_info *sc_ci; } cpu_softc_t; + +/* statics */ static struct pcb lwp0pcb; static void *um_msgbuf; + +/* attachment */ CFATTACH_DECL_NEW(cpu, sizeof(cpu_softc_t), cpu_match, cpu_attach, NULL, NULL); static int @@ -115,6 +119,8 @@ cpu_configure(void) spl0(); } + +/* main guts */ void cpu_reboot(int howto, char *bootstr) { @@ -226,7 +232,7 @@ cpu_getmcontext(struct lwp *l, mcontext_ { struct pcb *pcb = lwp_getpcb(l); ucontext_t *ucp = &pcb->pcb_userret_ucp; - + #ifdef CPU_DEBUG thunk_printf_debug("cpu_getmcontext\n"); #endif @@ -277,25 +283,7 @@ cpu_lwp_free2(struct lwp *l) if (pcb == NULL) return; - - if (pcb->pcb_stack_userland) { - free(pcb->pcb_stack_userland, M_TEMP); - pcb->pcb_stack_userland = NULL; - pcb->pcb_ucp.uc_stack.ss_sp = NULL; - pcb->pcb_ucp.uc_stack.ss_size = 0; - } - - if (pcb->pcb_stack_syscall) { - free(pcb->pcb_stack_syscall, M_TEMP); - pcb->pcb_syscall_ucp.uc_stack.ss_sp = NULL; - pcb->pcb_syscall_ucp.uc_stack.ss_size = 0; - } - - if (pcb->pcb_stack_pagefault) { - free(pcb->pcb_stack_pagefault, M_TEMP); - pcb->pcb_pagefault_ucp.uc_stack.ss_sp = NULL; - pcb->pcb_pagefault_ucp.uc_stack.ss_size = 0; - } + /* XXX nothing to do? */ } static void @@ -332,42 +320,21 @@ cpu_lwp_fork(struct lwp *l1, struct lwp /* copy the PCB and its switchframes from parent */ memcpy(pcb2, pcb1, sizeof(struct pcb)); - stacksize = 2*PAGE_SIZE; - pcb2->pcb_stack_userland = malloc(stacksize, M_TEMP, M_WAITOK | M_ZERO); - pcb2->pcb_stack_syscall = malloc(stacksize, M_TEMP, M_WAITOK | M_ZERO); - pcb2->pcb_stack_pagefault = malloc(stacksize, M_TEMP, M_WAITOK | M_ZERO); - - KASSERT(pcb2->pcb_stack_userland); - KASSERT(pcb2->pcb_stack_syscall); - KASSERT(pcb2->pcb_stack_pagefault); - + /* refresh context */ if (thunk_getcontext(&pcb2->pcb_ucp)) panic("getcontext failed"); - /* set up the ucontext for the userland switch */ - pcb2->pcb_ucp.uc_stack.ss_sp = pcb2->pcb_stack_userland; - pcb2->pcb_ucp.uc_stack.ss_size = stacksize; + /* recalculate the system stack top */ + pcb2->sys_stack_top = pcb2->sys_stack + TRAPSTACKSIZE; + + /* get l2 its own stack */ + pcb2->pcb_ucp.uc_stack.ss_sp = pcb2->sys_stack; + pcb2->pcb_ucp.uc_stack.ss_size = pcb2->sys_stack_top - pcb2->sys_stack; pcb2->pcb_ucp.uc_flags = _UC_STACK | _UC_CPU; pcb2->pcb_ucp.uc_link = &pcb2->pcb_userret_ucp; thunk_makecontext(&pcb2->pcb_ucp, (void (*)(void)) cpu_lwp_trampoline, 3, &pcb2->pcb_ucp, func, arg); - - /* set up the ucontext for the syscall */ - pcb2->pcb_syscall_ucp.uc_stack.ss_sp = pcb2->pcb_stack_syscall; - pcb2->pcb_syscall_ucp.uc_stack.ss_size = stacksize; - pcb2->pcb_syscall_ucp.uc_flags = _UC_STACK | _UC_CPU; - pcb2->pcb_syscall_ucp.uc_link = &pcb2->pcb_userret_ucp; - thunk_makecontext(&pcb2->pcb_syscall_ucp, (void (*)(void)) syscall, - 0, NULL, NULL, NULL); - - /* set up the ucontext for the pagefault */ - pcb2->pcb_pagefault_ucp.uc_stack.ss_sp = pcb2->pcb_stack_pagefault; - pcb2->pcb_pagefault_ucp.uc_stack.ss_size = stacksize; - pcb2->pcb_pagefault_ucp.uc_flags = _UC_STACK | _UC_CPU; - pcb2->pcb_pagefault_ucp.uc_link = &pcb2->pcb_trapret_ucp; - thunk_makecontext(&pcb2->pcb_pagefault_ucp, (void (*)(void)) pagefault, - 0, NULL, NULL, NULL); } void @@ -382,7 +349,7 @@ void cpu_startup(void) { vaddr_t minaddr, maxaddr; - size_t stacksize, msgbufsize = 32 * 1024; + size_t msgbufsize = 32 * 1024; /* get ourself a message buffer */ um_msgbuf = kmem_zalloc(msgbufsize, KM_SLEEP); @@ -400,26 +367,12 @@ cpu_startup(void) /* init lwp0 */ memset(&lwp0pcb, 0, sizeof(lwp0pcb)); - if (thunk_getcontext(&lwp0pcb.pcb_ucp)) - panic("getcontext failed"); - uvm_lwp_setuarea(&lwp0, (vaddr_t)&lwp0pcb); + thunk_getcontext(&lwp0pcb.pcb_ucp); + uvm_lwp_setuarea(&lwp0, (vaddr_t) &lwp0pcb); + memcpy(&lwp0pcb.pcb_userret_ucp, &lwp0pcb.pcb_ucp, sizeof(ucontext_t)); - /* init trapframes (going nowhere!), maybe a panic func? */ - memcpy(&lwp0pcb.pcb_syscall_ucp, &lwp0pcb.pcb_ucp, sizeof(ucontext_t)); - memcpy(&lwp0pcb.pcb_userret_ucp, &lwp0pcb.pcb_ucp, sizeof(ucontext_t)); - memcpy(&lwp0pcb.pcb_pagefault_ucp, &lwp0pcb.pcb_ucp, sizeof(ucontext_t)); - memcpy(&lwp0pcb.pcb_trapret_ucp, &lwp0pcb.pcb_ucp, sizeof(ucontext_t)); - - /* set up the ucontext for the pagefault */ - stacksize = 2*PAGE_SIZE; - lwp0pcb.pcb_stack_pagefault = malloc(stacksize, M_TEMP, M_WAITOK | M_ZERO); - - lwp0pcb.pcb_pagefault_ucp.uc_stack.ss_sp = lwp0pcb.pcb_stack_pagefault; - lwp0pcb.pcb_pagefault_ucp.uc_stack.ss_size = stacksize; - lwp0pcb.pcb_pagefault_ucp.uc_flags = _UC_STACK | _UC_CPU; - lwp0pcb.pcb_pagefault_ucp.uc_link = &lwp0pcb.pcb_userret_ucp; - thunk_makecontext(&lwp0pcb.pcb_pagefault_ucp, (void (*)(void)) pagefault, - 0, NULL, NULL, NULL); + /* set stack top */ + lwp0pcb.sys_stack_top = lwp0pcb.sys_stack + TRAPSTACKSIZE; } void Index: src/sys/arch/usermode/include/pcb.h diff -u src/sys/arch/usermode/include/pcb.h:1.16 src/sys/arch/usermode/include/pcb.h:1.17 --- src/sys/arch/usermode/include/pcb.h:1.16 Fri Jan 6 12:54:59 2012 +++ src/sys/arch/usermode/include/pcb.h Sat Jan 14 17:42:51 2012 @@ -1,4 +1,4 @@ -/* $NetBSD: pcb.h,v 1.16 2012/01/06 12:54:59 reinoud Exp $ */ +/* $NetBSD: pcb.h,v 1.17 2012/01/14 17:42:51 reinoud Exp $ */ /*- * Copyright (c) 2007 Jared D. McNeill <jmcne...@invisible.ca> @@ -31,30 +31,20 @@ #include <sys/cdefs.h> #include <sys/ucontext.h> +#include <sys/queue.h> -/* - * Trap frame. Pushed onto the kernel stack on a trap (synchronous exception). - * XXX move to frame.h? - */ - -//typedef ucontext_t trapframe; +#define TRAPSTACKSIZE (USPACE -2*sizeof(ucontext_t) - 3*sizeof(register_t)) struct pcb { - void *pcb_stack_userland; - void *pcb_stack_syscall; - void *pcb_stack_pagefault; - - ucontext_t pcb_ucp; /* lwp switchframe */ - ucontext_t pcb_syscall_ucp; /* syscall context */ - ucontext_t pcb_userret_ucp; /* return to userland context */ - ucontext_t pcb_pagefault_ucp; /* pagefault context */ - ucontext_t pcb_trapret_ucp; - - void * pcb_onfault; /* on fault handler */ - - int pcb_errno; /* save/restore place */ - vaddr_t pcb_fault_addr; /* save place for fault addr */ - vaddr_t pcb_fault_pc; /* save place for fault PC */ + ucontext_t pcb_ucp; /* switchframe */ + ucontext_t pcb_userret_ucp; + + uint8_t *sys_stack_top; /* points at free point in sys_stack */ + uint8_t sys_stack[TRAPSTACKSIZE]; + + void *pcb_onfault; /* on fault handler */ + + int pcb_errno; /* save/restore place */ }; #endif /* !_ARCH_USERMODE_INCLUDE_PCB_H */ Index: src/sys/arch/usermode/target/i386/cpu_i386.c diff -u src/sys/arch/usermode/target/i386/cpu_i386.c:1.1 src/sys/arch/usermode/target/i386/cpu_i386.c:1.2 --- src/sys/arch/usermode/target/i386/cpu_i386.c:1.1 Sat Jan 7 20:07:01 2012 +++ src/sys/arch/usermode/target/i386/cpu_i386.c Sat Jan 14 17:42:52 2012 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu_i386.c,v 1.1 2012/01/07 20:07:01 reinoud Exp $ */ +/* $NetBSD: cpu_i386.c,v 1.2 2012/01/14 17:42:52 reinoud Exp $ */ /*- * Copyright (c) 2011 Reinoud Zandijk <rein...@netbsd.org> @@ -34,7 +34,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: cpu_i386.c,v 1.1 2012/01/07 20:07:01 reinoud Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cpu_i386.c,v 1.2 2012/01/14 17:42:52 reinoud Exp $"); #include <sys/types.h> #include <sys/systm.h> @@ -55,6 +55,8 @@ __KERNEL_RCSID(0, "$NetBSD: cpu_i386.c,v #include <machine/machdep.h> #include <machine/thunk.h> +#include "opt_exec.h" + #if 0 static void dump_regs(register_t *reg);; @@ -108,16 +110,18 @@ sendsig_siginfo(const ksiginfo_t *ksi, c struct sigframe_siginfo *fp, frame; int sig = ksi->ksi_signo; sig_t catcher = SIGACTION(p, sig).sa_handler; - ucontext_t *ucp = &pcb->pcb_userret_ucp; - register_t *reg = (register_t *) &ucp->uc_mcontext; + ucontext_t *ucp; + register_t *reg; int onstack, error; KASSERT(mutex_owned(p->p_lock)); + ucp = &pcb->pcb_userret_ucp; + reg = (register_t *) &ucp->uc_mcontext; #if 0 - printf("%s: ", __func__); - printf("flags %d, ", (int) ksi->ksi_flags); - printf("to lwp %d, signo %d, code %d, errno %d\n", + thunk_printf("%s: ", __func__); + thunk_printf("flags %d, ", (int) ksi->ksi_flags); + thunk_printf("to lwp %d, signo %d, code %d, errno %d\n", (int) ksi->ksi_lid, ksi->ksi_signo, ksi->ksi_code, @@ -179,7 +183,7 @@ void setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack) { struct pcb *pcb = lwp_getpcb(l); - ucontext_t *ucp = &pcb->pcb_userret_ucp; + ucontext_t *ucp; uint *reg, i; #ifdef DEBUG_EXEC @@ -190,16 +194,15 @@ setregs(struct lwp *l, struct exec_packa pcb->pcb_ucp.uc_stack.ss_sp); printf("\tpcb->pcb_ucp.uc_stack.ss_size = %d\n", (int) pcb->pcb_ucp.uc_stack.ss_size); - printf("\tpcb->pcb_userret_ucp.uc_stack.ss_sp = %p\n", - pcb->pcb_userret_ucp.uc_stack.ss_sp); - printf("\tpcb->pcb_userret_ucp.uc_stack.ss_size = %d\n", - (int) pcb->pcb_userret_ucp.uc_stack.ss_size); #endif + /* set up the user context */ + ucp = &pcb->pcb_userret_ucp; reg = (int *) &ucp->uc_mcontext; for (i = 4; i < 11; i++) reg[i] = 0; + /* use given stack */ ucp->uc_stack.ss_sp = (void *) (stack-4); /* to prevent clearing */ ucp->uc_stack.ss_size = 0; //pack->ep_ssize; thunk_makecontext(ucp, (void *) pack->ep_entry, 0, NULL, NULL, NULL); @@ -216,10 +219,6 @@ setregs(struct lwp *l, struct exec_packa pcb->pcb_ucp.uc_stack.ss_sp); printf("\tpcb->pcb_ucp.uc_stack.ss_size = %d\n", (int) pcb->pcb_ucp.uc_stack.ss_size); - printf("\tpcb->pcb_userret_ucp.uc_stack.ss_sp = %p\n", - pcb->pcb_userret_ucp.uc_stack.ss_sp); - printf("\tpcb->pcb_userret_ucp.uc_stack.ss_size = %d\n", - (int) pcb->pcb_userret_ucp.uc_stack.ss_size); printf("\tpack->ep_entry = %p\n", (void *) pack->ep_entry); #endif @@ -274,6 +273,14 @@ md_get_pc(ucontext_t *ucp) return reg[14]; /* EIP */ } +register_t +md_get_sp(ucontext_t *ucp) +{ + register_t *reg = (register_t *) &ucp->uc_mcontext; + + return reg[17]; /* ESP */ +} + int md_syscall_check_opcode(ucontext_t *ucp) { Index: src/sys/arch/usermode/target/x86_64/cpu_x86_64.c diff -u src/sys/arch/usermode/target/x86_64/cpu_x86_64.c:1.1 src/sys/arch/usermode/target/x86_64/cpu_x86_64.c:1.2 --- src/sys/arch/usermode/target/x86_64/cpu_x86_64.c:1.1 Sat Jan 7 20:44:42 2012 +++ src/sys/arch/usermode/target/x86_64/cpu_x86_64.c Sat Jan 14 17:42:52 2012 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu_x86_64.c,v 1.1 2012/01/07 20:44:42 reinoud Exp $ */ +/* $NetBSD: cpu_x86_64.c,v 1.2 2012/01/14 17:42:52 reinoud Exp $ */ /*- * Copyright (c) 2011 Reinoud Zandijk <rein...@netbsd.org> @@ -35,7 +35,7 @@ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: cpu_x86_64.c,v 1.1 2012/01/07 20:44:42 reinoud Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cpu_x86_64.c,v 1.2 2012/01/14 17:42:52 reinoud Exp $"); #include <sys/types.h> #include <sys/systm.h> @@ -110,13 +110,15 @@ sendsig_siginfo(const ksiginfo_t *ksi, c struct sigframe_siginfo *fp, frame; int sig = ksi->ksi_signo; sig_t catcher = SIGACTION(p, sig).sa_handler; - ucontext_t *ucp = &pcb->pcb_userret_ucp; - register_t *reg = (register_t *) &ucp->uc_mcontext; + ucontext_t *ucp; + register_t *reg; int onstack, error; char *sp; KASSERT(mutex_owned(p->p_lock)); + ucp = &pcb->pcb_userret_ucp; + reg = (register_t *) &ucp->uc_mcontext; #if 0 thunk_printf("%s: ", __func__); thunk_printf("flags %d, ", (int) ksi->ksi_flags); @@ -187,10 +189,12 @@ void setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack) { struct pcb *pcb = lwp_getpcb(l); - ucontext_t *ucp = &pcb->pcb_userret_ucp; + ucontext_t *ucp; register_t *reg; int i; + /* set up the user context */ + ucp = &pcb->pcb_userret_ucp; reg = (register_t *) &ucp->uc_mcontext; for (i = 0; i < 15; i++) reg[i] = 0; @@ -199,6 +203,7 @@ setregs(struct lwp *l, struct exec_packa reg[21] = pack->ep_entry; /* RIP */ reg[24] = (register_t) stack; /* RSP */ + /* use given stack */ ucp->uc_stack.ss_sp = (void *) stack; ucp->uc_stack.ss_size = pack->ep_ssize; @@ -270,6 +275,14 @@ md_get_pc(ucontext_t *ucp) return reg[21]; /* RIP */ } +register_t +md_get_sp(ucontext_t *ucp) +{ + register_t *reg = (register_t *) &ucp->uc_mcontext; + + return reg[24]; /* RSP */ +} + int md_syscall_check_opcode(ucontext_t *ucp) { Index: src/sys/arch/usermode/usermode/copy.c diff -u src/sys/arch/usermode/usermode/copy.c:1.6 src/sys/arch/usermode/usermode/copy.c:1.7 --- src/sys/arch/usermode/usermode/copy.c:1.6 Fri Dec 9 17:24:25 2011 +++ src/sys/arch/usermode/usermode/copy.c Sat Jan 14 17:42:52 2012 @@ -1,4 +1,4 @@ -/* $NetBSD: copy.c,v 1.6 2011/12/09 17:24:25 reinoud Exp $ */ +/* $NetBSD: copy.c,v 1.7 2012/01/14 17:42:52 reinoud Exp $ */ /*- * Copyright (c) 2007 Jared D. McNeill <jmcne...@invisible.ca> @@ -27,10 +27,11 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: copy.c,v 1.6 2011/12/09 17:24:25 reinoud Exp $"); +__KERNEL_RCSID(0, "$NetBSD: copy.c,v 1.7 2012/01/14 17:42:52 reinoud Exp $"); #include <sys/types.h> #include <sys/systm.h> +#include <machine/thunk.h> /* XXX until strnlen(3) has been added to the kernel, we *could* panic on it */ #define strnlen(str, maxlen) min(strlen((str)), maxlen) @@ -38,7 +39,7 @@ __KERNEL_RCSID(0, "$NetBSD: copy.c,v 1.6 int copyin(const void *uaddr, void *kaddr, size_t len) { -// aprint_debug("copyin uaddr %p, kaddr %p, len %d\n", uaddr, kaddr, (int) len); +// thunk_printf("copyin uaddr %p, kaddr %p, len %d\n", uaddr, kaddr, (int) len); memcpy(kaddr, uaddr, len); return 0; } @@ -46,7 +47,7 @@ copyin(const void *uaddr, void *kaddr, s int copyout(const void *kaddr, void *uaddr, size_t len) { -// aprint_debug("copyout kaddr %p, uaddr %p, len %d\n", kaddr, uaddr, (int) len); +// thunk_printf("copyout kaddr %p, uaddr %p, len %d\n", kaddr, uaddr, (int) len); memcpy(uaddr, kaddr, len); return 0; } Index: src/sys/arch/usermode/usermode/pmap.c diff -u src/sys/arch/usermode/usermode/pmap.c:1.101 src/sys/arch/usermode/usermode/pmap.c:1.102 --- src/sys/arch/usermode/usermode/pmap.c:1.101 Tue Jan 10 12:07:17 2012 +++ src/sys/arch/usermode/usermode/pmap.c Sat Jan 14 17:42:52 2012 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.c,v 1.101 2012/01/10 12:07:17 reinoud Exp $ */ +/* $NetBSD: pmap.c,v 1.102 2012/01/14 17:42:52 reinoud Exp $ */ /*- * Copyright (c) 2011 Reinoud Zandijk <rein...@netbsd.org> @@ -27,7 +27,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.101 2012/01/10 12:07:17 reinoud Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.102 2012/01/14 17:42:52 reinoud Exp $"); #include "opt_memsize.h" #include "opt_kmempages.h" @@ -41,6 +41,7 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.1 #include <sys/pool.h> #include <machine/thunk.h> #include <machine/machdep.h> +#include <machine/pcb.h> #include <uvm/uvm.h> @@ -194,6 +195,16 @@ pmap_bootstrap(void) aprint_verbose("\n\n"); + /* make critical assertions before modifying anything */ + if (sizeof(struct pcb) > USPACE) { + panic("sizeof(struct pcb) is %d bytes too big for USPACE. " + "Please adjust TRAPSTACKSIZE calculation", + (int) (USPACE - sizeof(struct pcb))); + } + if (TRAPSTACKSIZE < 4*PAGE_SIZE) { + panic("TRAPSTACKSIZE is too small, please increase UPAGES"); + } + /* protect user memory UVM area (---) */ err = thunk_munmap((void *) kmem_user_start, kmem_k_start - kmem_user_start); @@ -625,11 +636,8 @@ pmap_lookup_pv(pmap_t pmap, uintptr_t lp int l1 = lpn / PMAP_L2_NENTRY; int l2 = lpn % PMAP_L2_NENTRY; -#ifdef DIAGNOSTIC if (lpn >= pm_nentries) - panic("peeing outside box : addr in page around %"PRIx64"\n", - (uint64_t) lpn*PAGE_SIZE); -#endif + return NULL; l2tbl = pmap->pm_l1[l1]; if (l2tbl) Index: src/sys/arch/usermode/usermode/syscall.c diff -u src/sys/arch/usermode/usermode/syscall.c:1.20 src/sys/arch/usermode/usermode/syscall.c:1.21 --- src/sys/arch/usermode/usermode/syscall.c:1.20 Tue Jan 3 12:05:00 2012 +++ src/sys/arch/usermode/usermode/syscall.c Sat Jan 14 17:42:52 2012 @@ -1,4 +1,4 @@ -/* $NetBSD: syscall.c,v 1.20 2012/01/03 12:05:00 reinoud Exp $ */ +/* $NetBSD: syscall.c,v 1.21 2012/01/14 17:42:52 reinoud Exp $ */ /*- * Copyright (c) 2007 Jared D. McNeill <jmcne...@invisible.ca> @@ -27,7 +27,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: syscall.c,v 1.20 2012/01/03 12:05:00 reinoud Exp $"); +__KERNEL_RCSID(0, "$NetBSD: syscall.c,v 1.21 2012/01/14 17:42:52 reinoud Exp $"); #include <sys/types.h> #include <sys/param.h> @@ -61,11 +61,12 @@ child_return(void *arg) lwp_t *l = arg; register_t rval[2]; struct pcb *pcb = lwp_getpcb(l); + ucontext_t *ucp = &pcb->pcb_userret_ucp; /* return value zero */ rval[0] = 0; rval[1] = 0; - md_syscall_set_returnargs(l, &pcb->pcb_userret_ucp, 0, rval); + md_syscall_set_returnargs(l, ucp, 0, rval); aprint_debug("child return! lwp %p\n", l); userret(l); @@ -99,8 +100,6 @@ syscall(void) curcpu()->ci_data.cpu_nsyscall++; LWP_CACHE_CREDS(l, l->l_proc); - /* TODO are we allowed to execute system calls in this memory space? */ - /* XXX do we want do do emulation? */ md_syscall_get_opcode(ucp, &opcode); md_syscall_get_syscallnumber(ucp, &code); @@ -161,7 +160,7 @@ syscall(void) break; } //thunk_printf_debug("end of syscall : return to userland\n"); -//if (code != 4) printf("userret() code %d\n", code); +//if (code != 4) thunk_printf("userret() code %d\n", code); userret(l); } @@ -173,54 +172,54 @@ syscall_args_print(lwp_t *l, int code, i return; if (code != 4) { - printf("lwp %p, code %3d, nargs %d, argsize %3d\t%s(", + thunk_printf("lwp %p, code %3d, nargs %d, argsize %3d\t%s(", l, code, nargs, argsize, syscallnames[code]); switch (code) { case 5: - printf("\"%s\", %"PRIx32", %"PRIx32"", (char *) (args[0]), (uint) args[1], (uint) args[2]); + thunk_printf("\"%s\", %"PRIx32", %"PRIx32"", (char *) (args[0]), (uint) args[1], (uint) args[2]); break; case 33: - printf("\"%s\", %"PRIx32"", (char *) (args[0]), (uint) args[1]); + thunk_printf("\"%s\", %"PRIx32"", (char *) (args[0]), (uint) args[1]); break; case 50: - printf("\"%s\"", (char *) (args[0])); + thunk_printf("\"%s\"", (char *) (args[0])); break; case 58: - printf("\"%s\", %"PRIx32", %"PRIx32"", (char *) (args[0]), (uint) (args[1]), (uint) args[2]); + thunk_printf("\"%s\", %"PRIx32", %"PRIx32"", (char *) (args[0]), (uint) (args[1]), (uint) args[2]); break; case 59: - printf("\"%s\", [", (char *) (args[0])); + thunk_printf("\"%s\", [", (char *) (args[0])); argv = (char **) (args[1]); if (*argv) { while (*argv) { - printf("\"%s\", ", *argv); + thunk_printf("\"%s\", ", *argv); argv++; } - printf("\b\b"); + thunk_printf("\b\b"); } - printf("], ["); + thunk_printf("], ["); envp = (char **) (args[2]); if (*envp) { while (*envp) { - printf("\"%s\", ", *envp); + thunk_printf("\"%s\", ", *envp); envp++; } - printf("\b\b"); + thunk_printf("\b\b"); } - printf("]"); + thunk_printf("]"); break; default: for (int i = 0; i < nargs; i++) - printf("%"PRIx32", ", (uint) args[i]); + thunk_printf("%"PRIx32", ", (uint) args[i]); if (nargs) - printf("\b\b"); + thunk_printf("\b\b"); } - printf(") "); + thunk_printf(") "); } #if 0 if ((code == 4)) { // thunk_printf_debug("[us] %s", (char *) args[1]); - printf("[us] %s", (char *) args[1]); + thunk_printf("[us] %s", (char *) args[1]); } #endif } @@ -243,7 +242,7 @@ return; errstr = "OK"; } if (code != 4) - printf("=> %s: %d, (%"PRIx32", %"PRIx32")\n", + thunk_printf("=> %s: %d, (%"PRIx32", %"PRIx32")\n", errstr, error, (uint) (rval[0]), (uint) (rval[1])); } Index: src/sys/arch/usermode/usermode/thunk.c diff -u src/sys/arch/usermode/usermode/thunk.c:1.77 src/sys/arch/usermode/usermode/thunk.c:1.78 --- src/sys/arch/usermode/usermode/thunk.c:1.77 Wed Jan 11 12:37:50 2012 +++ src/sys/arch/usermode/usermode/thunk.c Sat Jan 14 17:42:52 2012 @@ -1,4 +1,4 @@ -/* $NetBSD: thunk.c,v 1.77 2012/01/11 12:37:50 reinoud Exp $ */ +/* $NetBSD: thunk.c,v 1.78 2012/01/14 17:42:52 reinoud Exp $ */ /*- * Copyright (c) 2011 Jared D. McNeill <jmcne...@invisible.ca> @@ -28,7 +28,7 @@ #include <sys/cdefs.h> #ifdef __NetBSD__ -__RCSID("$NetBSD: thunk.c,v 1.77 2012/01/11 12:37:50 reinoud Exp $"); +__RCSID("$NetBSD: thunk.c,v 1.78 2012/01/14 17:42:52 reinoud Exp $"); #endif #include <sys/types.h> @@ -960,7 +960,7 @@ thunk_rfb_open(thunk_rfb_t *rfb, uint16_ } static ssize_t -safe_send(int s, const void *msg, size_t len) +safe_send(int s, const void *msg, int len) { const uint8_t *p; ssize_t sent_len; @@ -982,7 +982,7 @@ safe_send(int s, const void *msg, size_t } static ssize_t -safe_recv(int s, void *buf, size_t len) +safe_recv(int s, void *buf, int len) { uint8_t *p; int recv_len; Index: src/sys/arch/usermode/usermode/trap.c diff -u src/sys/arch/usermode/usermode/trap.c:1.55 src/sys/arch/usermode/usermode/trap.c:1.56 --- src/sys/arch/usermode/usermode/trap.c:1.55 Wed Jan 11 12:40:26 2012 +++ src/sys/arch/usermode/usermode/trap.c Sat Jan 14 17:42:52 2012 @@ -1,4 +1,4 @@ -/* $NetBSD: trap.c,v 1.55 2012/01/11 12:40:26 reinoud Exp $ */ +/* $NetBSD: trap.c,v 1.56 2012/01/14 17:42:52 reinoud Exp $ */ /*- * Copyright (c) 2011 Reinoud Zandijk <rein...@netbsd.org> @@ -27,7 +27,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.55 2012/01/11 12:40:26 reinoud Exp $"); +__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.56 2012/01/14 17:42:52 reinoud Exp $"); #include <sys/types.h> #include <sys/param.h> @@ -35,6 +35,7 @@ __KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.5 #include <sys/proc.h> #include <sys/systm.h> #include <sys/userret.h> +#include <sys/kauth.h> #include <sys/errno.h> #include <uvm/uvm_extern.h> @@ -54,6 +55,9 @@ static void mem_access_handler(int sig, static void illegal_instruction_handler(int sig, siginfo_t *info, void *ctx); extern int errno; +static void pagefault(vaddr_t pc, vaddr_t va); +static void illegal_instruction(void); + bool pmap_fault(pmap_t pmap, vaddr_t va, vm_prot_t *atype); static stack_t sigstk; @@ -79,7 +83,8 @@ setup_signal_handlers(void) panic("can't set alternate stacksize: %d", thunk_geterrno()); - sa.sa_flags = SA_RESTART | SA_SIGINFO | SA_ONSTACK; + /* SIGBUS and SIGSEGV need to be reentrant hence the SA_NODEFER */ + sa.sa_flags = SA_RESTART | SA_SIGINFO | SA_ONSTACK | SA_NODEFER; sa.sa_sigaction = mem_access_handler; thunk_sigemptyset(&sa.sa_mask); if (thunk_sigaction(SIGSEGV, &sa, NULL) == -1) @@ -113,13 +118,14 @@ stop_all_signal_handlers(void) } +/* signal handler switching to a pagefault context */ static void mem_access_handler(int sig, siginfo_t *info, void *ctx) { - ucontext_t *uct = ctx; + ucontext_t *ucp = ctx; struct lwp *l; struct pcb *pcb; - vaddr_t va, pc; + vaddr_t va, sp, pc, fp; assert((info->si_signo == SIGSEGV) || (info->si_signo == SIGBUS)); @@ -156,29 +162,55 @@ mem_access_handler(int sig, siginfo_t *i /* get PC address of faulted memory instruction */ pc = md_get_pc(ctx); + /* setup for pagefault context */ + sp = md_get_sp(ctx); + #if 0 - thunk_printf("memaccess error : pc = %p, va = %p\n", - (void *) pc, (void *) va); + printf("memaccess error, pc %p, va %p, " + "sys_stack %p, sp %p, stack top %p\n", + (void *) pc, (void *) va, + (void *) pcb->sys_stack, (void *) sp, + (void *) pcb->sys_stack_top); #endif - /* copy this state to return to */ - memcpy(&pcb->pcb_trapret_ucp, uct, sizeof(ucontext_t)); + /* if we're running on a stack of our own, use the system stack */ + if ((sp < (vaddr_t) pcb->sys_stack) || (sp > (vaddr_t) pcb->sys_stack_top)) { + sp = (vaddr_t) pcb->sys_stack_top - sizeof(register_t); + fp = (vaddr_t) &pcb->pcb_userret_ucp; + } else { + /* stack grows down */ + fp = sp - sizeof(ucontext_t) - sizeof(register_t); /* slack */ + sp = fp - sizeof(register_t); /* slack */ + + /* sanity check before copying */ + if (fp - 2*PAGE_SIZE < (vaddr_t) pcb->sys_stack) + panic("%s: out of system stack", __func__); + } + + memcpy((void *) fp, ucp, sizeof(ucontext_t)); - /* remember our parameters */ - pcb->pcb_fault_addr = va; - pcb->pcb_fault_pc = pc; + /* create context for pagefault */ + pcb->pcb_ucp.uc_stack.ss_sp = (void *) pcb->sys_stack; + pcb->pcb_ucp.uc_stack.ss_size = sp - (vaddr_t) pcb->sys_stack; + pcb->pcb_ucp.uc_link = (void *) fp; /* link to old frame on stack */ + + pcb->pcb_ucp.uc_flags = _UC_STACK | _UC_CPU; + thunk_makecontext(&pcb->pcb_ucp, (void (*)(void)) pagefault, + 2, (void *) pc, (void *) va, NULL); - /* switch to the pagefault entry on return from signal */ - memcpy(uct, &pcb->pcb_pagefault_ucp, sizeof(ucontext_t)); + /* switch to the new pagefault entry on return from signal */ + memcpy(ctx, &pcb->pcb_ucp, sizeof(ucontext_t)); } +/* signal handler switching to a illegal instruction context */ static void illegal_instruction_handler(int sig, siginfo_t *info, void *ctx) { - ucontext_t *uct = ctx; + ucontext_t *ucp = ctx; struct lwp *l; struct pcb *pcb; + vaddr_t sp, fp; assert(info->si_signo == SIGILL); #if 0 @@ -214,27 +246,49 @@ illegal_instruction_handler(int sig, sig l = curlwp; pcb = lwp_getpcb(l); - /* copy this state to return to */ - memcpy(&pcb->pcb_userret_ucp, uct, sizeof(ucontext_t)); + /* setup for illegal_instruction context */ + sp = md_get_sp(ctx); - /* if its a syscall ... */ - if (md_syscall_check_opcode(uct)) { - /* switch to the syscall entry on return from signal */ - memcpy(uct, &pcb->pcb_syscall_ucp, sizeof(ucontext_t)); - return; + /* if we're running on a stack of our own, use the system stack */ + if ((sp < (vaddr_t) pcb->sys_stack) || + (sp > (vaddr_t) pcb->sys_stack_top)) { + sp = (vaddr_t) pcb->sys_stack_top - sizeof(register_t); + fp = (vaddr_t) &pcb->pcb_userret_ucp; + } else { + panic("illegal instruction inside kernel?"); +#if 0 + /* stack grows down */ + fp = sp - sizeof(ucontext_t) - sizeof(register_t); /* slack */ + sp = fp - sizeof(register_t); /* slack */ + + /* sanity check before copying */ + if (fp - 2*PAGE_SIZE < (vaddr_t) pcb->sys_stack) + panic("%s: out of system stack", __func__); +#endif } - panic("should deliver a trap to the process : illegal instruction " - "encountered\n"); + memcpy((void *) fp, ucp, sizeof(ucontext_t)); + + /* create context for illegal instruction */ + pcb->pcb_ucp.uc_stack.ss_sp = (void *) pcb->sys_stack; + pcb->pcb_ucp.uc_stack.ss_size = sp - (vaddr_t) pcb->sys_stack; + pcb->pcb_ucp.uc_link = (void *) fp; /* link to old frame on stack */ + + pcb->pcb_ucp.uc_flags = _UC_STACK | _UC_CPU; + thunk_makecontext(&pcb->pcb_ucp, (void (*)(void)) illegal_instruction, + 0, NULL, NULL, NULL); + + /* switch to the new illegal instruction entry on return from signal */ + memcpy(ctx, &pcb->pcb_ucp, sizeof(ucontext_t)); } /* - * Entry point from the segv handler; check if its a pmap reference fault or - * let uvm handle it. + * Context for handing page faults from the sigsegv handler; check if its a + * pmap reference fault or let uvm handle it. */ -void -pagefault(void) +static void +pagefault(vaddr_t pc, vaddr_t va) { struct proc *p; struct lwp *l; @@ -242,16 +296,14 @@ pagefault(void) struct vmspace *vm; struct vm_map *vm_map; vm_prot_t atype; - vaddr_t va, pc; void *onfault; - int from_kernel, lwp_errno, rv; + int from_kernel, lwp_errno, error; + ksiginfo_t ksi; - l = curlwp; + l = curlwp; KASSERT(l); pcb = lwp_getpcb(l); p = l->l_proc; vm = p->p_vmspace; - va = pcb->pcb_fault_addr; - pc = pcb->pcb_fault_pc; lwp_errno = thunk_geterrno(); @@ -261,53 +313,120 @@ pagefault(void) vm_map = kernel_map; #if 0 - thunk_printf_debug("pagefault : pc %p, va %p\n", + thunk_printf("pagefault : pc %p, va %p\n", (void *) pc, (void *) va); #endif /* can pmap handle it? on its own? (r/m) */ + if (pmap_fault(vm_map->pmap, va, &atype)) { +// thunk_printf("pagefault leave (pmap)\n"); + goto out; + } + + /* ask UVM */ + thunk_printf_debug("pmap fault couldn't handle it! : " + "derived atype %d\n", atype); + onfault = pcb->pcb_onfault; - rv = 0; - if (!pmap_fault(vm_map->pmap, va, &atype)) { - thunk_printf_debug("pmap fault couldn't handle it! : " - "derived atype %d\n", atype); - - /* extra debug for now */ - if (pcb == 0) - panic("NULL pcb!\n"); - - pcb->pcb_onfault = NULL; - rv = uvm_fault(vm_map, va, atype); - pcb->pcb_onfault = onfault; + pcb->pcb_onfault = NULL; + error = uvm_fault(vm_map, va, atype); + pcb->pcb_onfault = onfault; + + if (vm_map != kernel_map) { + if (error == 0) + uvm_grow(l->l_proc, va); } + if (error == EACCES) + error = EFAULT; - if (rv) { - thunk_printf_debug("uvm_fault returned error %d\n", rv); + /* if uvm handled it, return */ + if (error == 0) { +// thunk_printf("pagefault leave (uvm)\n"); + goto out; + } - /* something got wrong */ - if (from_kernel) { - /* copyin / copyout */ - if (!onfault) - panic("kernel fault"); - panic("%s: can't call onfault yet\n", __func__); - /* XXX implement me ? */ - /* jump to given onfault */ - // tf = &kernel_tf; - // memset(tf, 0, sizeof(struct trapframe)); - // tf->tf_pc = onfault; - // tf->tf_io[0] = (rv == EACCES) ? EFAULT : rv; - return; - } - panic("%s: should deliver a trap to the process for va %p", __func__, (void *) va); - /* XXX HOWTO see arm/arm/syscall.c illegal instruction signal */ + /* something got wrong */ + thunk_printf("%s: uvm fault %d, pc %p, from_kernel %d\n", + __func__, error, (void *) pc, from_kernel); + + /* check if its from copyin/copyout */ + if (onfault) { + panic("%s: can't call onfault yet\n", __func__); + /* XXX implement me ? */ + /* jump to given onfault */ + // tf = &kernel_tf; + // memset(tf, 0, sizeof(struct trapframe)); + // tf->tf_pc = onfault; + // tf->tf_io[0] = (rv == EACCES) ? EFAULT : rv; + goto out; } + if (from_kernel) + panic("Unhandled page fault in kernel mode"); + + /* send signal */ + thunk_printf("giving signal to userland\n"); + + KSI_INIT_TRAP(&ksi); + ksi.ksi_signo = SIGSEGV; + ksi.ksi_trap = 0; /* XXX */ + ksi.ksi_code = (error == EPERM) ? SEGV_ACCERR : SEGV_MAPERR; + ksi.ksi_addr = (void *) va; + + if (error == ENOMEM) { + printf("UVM: pid %d.%d (%s), uid %d killed: " + "out of swap\n", + p->p_pid, l->l_lid, p->p_comm, + l->l_cred ? kauth_cred_geteuid(l->l_cred) : -1); + ksi.ksi_signo = SIGKILL; + } + +#if 0 + p->p_emul->e_trapsignal(l, &ksi); +#else + trapsignal(l, &ksi); +#endif + mi_userret(l); + +// thunk_printf("pagefault leave\n"); +out: thunk_seterrno(lwp_errno); pcb->pcb_errno = lwp_errno; } -stack_t * -usermode_signal_stack(void) +/* + * Context for handing illegal instruction from the sigill handler + */ +static void +illegal_instruction(void) { - return &sigstk; + struct lwp *l = curlwp; + struct pcb *pcb = lwp_getpcb(l); + ucontext_t *ucp = &pcb->pcb_userret_ucp; + ksiginfo_t ksi; + +// thunk_printf("illegal instruction\n"); + /* if its a syscall ... */ + if (md_syscall_check_opcode(ucp)) { + syscall(); +// thunk_printf("illegal instruction leave\n"); + return; + } + + thunk_printf("%s: giving SIGILL (TRAP)\n", __func__); + + KSI_INIT_TRAP(&ksi); + ksi.ksi_signo = SIGILL; + ksi.ksi_trap = 0; /* XXX */ + ksi.ksi_errno = 0; // info->si_errno; + ksi.ksi_code = 0; // info->si_code; + ksi.ksi_addr = (void *) md_get_pc(ucp); /* only relyable source */ + +#if 0 + p->p_emul->e_trapsignal(l, &ksi); +#else + trapsignal(l, &ksi); +#endif + mi_userret(l); } +