Module Name: src Committed By: pooka Date: Thu Jun 3 10:56:20 UTC 2010
Modified Files: src/sys/rump/librump/rumpkern: locks_up.c memalloc.c rump.c rump_private.h vm.c Log Message: Implement a sort-of pagedaemon: adjust all memory allocators to go through an in-rumpkernel hypermemory allocator which knows it should kick the pagedaemon and block in case ``waitok'' memory allocation fails. This allows us to recover from some out-of-memory situations. Realworld'istically speaking (as opposed to whatever "should be" theory), these OOM situations will happen extremely rarely if ever when our hypervisor is a regular process. Speculatively, this should be useful for other types of hosts. issues remaining: * the hypervisor does not know how to reclaim kernel memory (and for the reason I stated above, I'm not sure if it makes sense to teach the current implementation about that) * vfs memory (buffers, vm object pages etc.) is not reclaimed To generate a diff of this commit: cvs rdiff -u -r1.2 -r1.3 src/sys/rump/librump/rumpkern/locks_up.c cvs rdiff -u -r1.6 -r1.7 src/sys/rump/librump/rumpkern/memalloc.c cvs rdiff -u -r1.174 -r1.175 src/sys/rump/librump/rumpkern/rump.c cvs rdiff -u -r1.49 -r1.50 src/sys/rump/librump/rumpkern/rump_private.h cvs rdiff -u -r1.79 -r1.80 src/sys/rump/librump/rumpkern/vm.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/rump/librump/rumpkern/locks_up.c diff -u src/sys/rump/librump/rumpkern/locks_up.c:1.2 src/sys/rump/librump/rumpkern/locks_up.c:1.3 --- src/sys/rump/librump/rumpkern/locks_up.c:1.2 Tue Jun 1 20:11:33 2010 +++ src/sys/rump/librump/rumpkern/locks_up.c Thu Jun 3 10:56:20 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: locks_up.c,v 1.2 2010/06/01 20:11:33 pooka Exp $ */ +/* $NetBSD: locks_up.c,v 1.3 2010/06/03 10:56:20 pooka Exp $ */ /* * Copyright (c) 2010 Antti Kantee. All Rights Reserved. @@ -35,7 +35,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: locks_up.c,v 1.2 2010/06/01 20:11:33 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: locks_up.c,v 1.3 2010/06/03 10:56:20 pooka Exp $"); #include <sys/param.h> #include <sys/kernel.h> @@ -74,7 +74,7 @@ * XXX: pool_cache would be nice, but not easily possible, * as pool cache init wants to call mutex_init() ... */ - upm = rumpuser_malloc(sizeof(*upm), 0); + upm = rump_hypermalloc(sizeof(*upm), 0, true, "mutex_init"); memset(upm, 0, sizeof(*upm)); rumpuser_cv_init(&upm->upm_rucv); memcpy(mtx, &upm, sizeof(void *)); @@ -178,7 +178,7 @@ CTASSERT(sizeof(krwlock_t) >= sizeof(void *)); checkncpu(); - uprw = rumpuser_malloc(sizeof(*uprw), 0); + uprw = rump_hypermalloc(sizeof(*uprw), 0, true, "rwinit"); memset(uprw, 0, sizeof(*uprw)); rumpuser_cv_init(&uprw->uprw_rucv_reader); rumpuser_cv_init(&uprw->uprw_rucv_writer); Index: src/sys/rump/librump/rumpkern/memalloc.c diff -u src/sys/rump/librump/rumpkern/memalloc.c:1.6 src/sys/rump/librump/rumpkern/memalloc.c:1.7 --- src/sys/rump/librump/rumpkern/memalloc.c:1.6 Tue Jun 1 20:11:33 2010 +++ src/sys/rump/librump/rumpkern/memalloc.c Thu Jun 3 10:56:20 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: memalloc.c,v 1.6 2010/06/01 20:11:33 pooka Exp $ */ +/* $NetBSD: memalloc.c,v 1.7 2010/06/03 10:56:20 pooka Exp $ */ /* * Copyright (c) 2009 Antti Kantee. All Rights Reserved. @@ -26,7 +26,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: memalloc.c,v 1.6 2010/06/01 20:11:33 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: memalloc.c,v 1.7 2010/06/03 10:56:20 pooka Exp $"); #include <sys/param.h> #include <sys/kmem.h> @@ -73,7 +73,7 @@ { void *rv; - rv = rumpuser_malloc(size, 0); + rv = rump_hypermalloc(size, 0, (flags & M_WAITOK) != 0, "malloc"); if (rv && flags & M_ZERO) memset(rv, 0, size); @@ -110,7 +110,7 @@ kmem_alloc(size_t size, km_flag_t kmflag) { - return rumpuser_malloc(size, 0); + return rump_hypermalloc(size, 0, kmflag == KM_SLEEP, "kmem_alloc"); } void * @@ -240,18 +240,14 @@ void * pool_get(struct pool *pp, int flags) { - void *rv; #ifdef DIAGNOSTIC if (pp->pr_size == 0) panic("%s: pool unit size 0. not initialized?", __func__); #endif - rv = rumpuser_malloc(pp->pr_size, pp->pr_align); - if (rv == NULL && (flags & PR_WAITOK && (flags & PR_LIMITFAIL) == 0)) - panic("%s: out of memory and PR_WAITOK", __func__); - - return rv; + return rump_hypermalloc(pp->pr_size, pp->pr_align, + (flags & PR_WAITOK) != 0, "pget"); } void @@ -299,6 +295,20 @@ pc->pc_pool.pr_drain_hook_arg = arg; } +void +pool_drain_start(struct pool **ppp, uint64_t *wp) +{ + + /* nada */ +} + +void +pool_drain_end(struct pool *pp, uint64_t w) +{ + + /* nada again */ +} + int pool_prime(struct pool *pp, int nitems) { Index: src/sys/rump/librump/rumpkern/rump.c diff -u src/sys/rump/librump/rumpkern/rump.c:1.174 src/sys/rump/librump/rumpkern/rump.c:1.175 --- src/sys/rump/librump/rumpkern/rump.c:1.174 Wed Jun 2 10:55:18 2010 +++ src/sys/rump/librump/rumpkern/rump.c Thu Jun 3 10:56:20 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: rump.c,v 1.174 2010/06/02 10:55:18 pooka Exp $ */ +/* $NetBSD: rump.c,v 1.175 2010/06/03 10:56:20 pooka Exp $ */ /* * Copyright (c) 2007 Antti Kantee. All Rights Reserved. @@ -28,7 +28,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: rump.c,v 1.174 2010/06/02 10:55:18 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: rump.c,v 1.175 2010/06/03 10:56:20 pooka Exp $"); #include <sys/systm.h> #define ELFSIZE ARCH_ELFSIZE @@ -49,6 +49,7 @@ #include <sys/kernel.h> #include <sys/kmem.h> #include <sys/kprintf.h> +#include <sys/kthread.h> #include <sys/ksyms.h> #include <sys/msgbuf.h> #include <sys/module.h> @@ -357,6 +358,15 @@ pipe_init(); resource_init(); + /* start page baroness */ + if (rump_threads) { + if (kthread_create(PRI_PGDAEMON, KTHREAD_MPSAFE, NULL, + uvm_pageout, NULL, &uvm.pagedaemon_lwp, "pdaemon") != 0) + panic("pagedaemon create failed"); + } else + uvm.pagedaemon_lwp = NULL; /* doesn't match curlwp */ + + /* process dso's */ rumpuser_dl_bootstrap(add_linkedin_modules, rump_kernelfsym_load); /* these do nothing if not present */ Index: src/sys/rump/librump/rumpkern/rump_private.h diff -u src/sys/rump/librump/rumpkern/rump_private.h:1.49 src/sys/rump/librump/rumpkern/rump_private.h:1.50 --- src/sys/rump/librump/rumpkern/rump_private.h:1.49 Wed Jun 2 10:55:18 2010 +++ src/sys/rump/librump/rumpkern/rump_private.h Thu Jun 3 10:56:20 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: rump_private.h,v 1.49 2010/06/02 10:55:18 pooka Exp $ */ +/* $NetBSD: rump_private.h,v 1.50 2010/06/03 10:56:20 pooka Exp $ */ /* * Copyright (c) 2007 Antti Kantee. All Rights Reserved. @@ -129,4 +129,6 @@ void rump_intr_init(void); void rump_softint_run(struct cpu_info *); +void *rump_hypermalloc(size_t, int, bool, const char *); + #endif /* _SYS_RUMP_PRIVATE_H_ */ Index: src/sys/rump/librump/rumpkern/vm.c diff -u src/sys/rump/librump/rumpkern/vm.c:1.79 src/sys/rump/librump/rumpkern/vm.c:1.80 --- src/sys/rump/librump/rumpkern/vm.c:1.79 Wed Jun 2 10:55:18 2010 +++ src/sys/rump/librump/rumpkern/vm.c Thu Jun 3 10:56:20 2010 @@ -1,4 +1,4 @@ -/* $NetBSD: vm.c,v 1.79 2010/06/02 10:55:18 pooka Exp $ */ +/* $NetBSD: vm.c,v 1.80 2010/06/03 10:56:20 pooka Exp $ */ /* * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved. @@ -43,15 +43,16 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.79 2010/06/02 10:55:18 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.80 2010/06/03 10:56:20 pooka Exp $"); #include <sys/param.h> #include <sys/atomic.h> +#include <sys/buf.h> +#include <sys/kernel.h> #include <sys/kmem.h> #include <sys/mman.h> #include <sys/null.h> #include <sys/vnode.h> -#include <sys/buf.h> #include <machine/pmap.h> @@ -87,6 +88,10 @@ static struct vm_map_kernel kernel_map_store; struct vm_map *kernel_map = &kernel_map_store.vmk_map; +static unsigned int pdaemon_waiters; +static kmutex_t pdaemonmtx; +static kcondvar_t pdaemoncv, oomwait; + /* * vm pages */ @@ -233,12 +238,15 @@ { uvmexp.free = 1024*1024; /* XXX */ - uvm.pagedaemon_lwp = NULL; /* doesn't match curlwp */ rump_vmspace.vm_map.pmap = pmap_kernel(); mutex_init(&pagermtx, MUTEX_DEFAULT, 0); mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0); + mutex_init(&pdaemonmtx, MUTEX_DEFAULT, 0); + cv_init(&pdaemoncv, "pdaemon"); + cv_init(&oomwait, "oomwait"); + kernel_map->pmap = pmap_kernel(); callback_head_init(&kernel_map_store.vmk_reclaim_callback, IPL_VM); kmem_map->pmap = pmap_kernel(); @@ -460,31 +468,10 @@ vm_map_starved_p(struct vm_map *map) { - return false; -} + if (map->flags & VM_MAP_WANTVA) + return true; -void -uvm_pageout_start(int npages) -{ - - uvmexp.paging += npages; -} - -void -uvm_pageout_done(int npages) -{ - - uvmexp.paging -= npages; - - /* - * wake up either of pagedaemon or LWPs waiting for it. - */ - - if (uvmexp.free <= uvmexp.reserve_kernel) { - wakeup(&uvm.pagedaemon); - } else { - wakeup(&uvmexp.free); - } + return false; } int @@ -582,14 +569,15 @@ uvm_km_alloc_poolpage(struct vm_map *map, bool waitok) { - return (vaddr_t)rumpuser_malloc(PAGE_SIZE, PAGE_SIZE); + return (vaddr_t)rump_hypermalloc(PAGE_SIZE, PAGE_SIZE, + waitok, "kmalloc"); } void uvm_km_free_poolpage(struct vm_map *map, vaddr_t addr) { - rumpuser_unmap((void *)addr, PAGE_SIZE); + rumpuser_free((void *)addr); } vaddr_t @@ -648,13 +636,6 @@ } void -uvm_wait(const char *msg) -{ - - /* nothing to wait for */ -} - -void uvmspace_free(struct vmspace *vm) { @@ -703,3 +684,129 @@ /* nada */ } + +/* + * Routines related to the Page Baroness. + */ + +void +uvm_wait(const char *msg) +{ + + if (__predict_false(curlwp == uvm.pagedaemon_lwp)) + panic("pagedaemon out of memory"); + if (__predict_false(rump_threads == 0)) + panic("pagedaemon missing (RUMP_THREADS = 0)"); + + mutex_enter(&pdaemonmtx); + pdaemon_waiters++; + cv_signal(&pdaemoncv); + cv_wait(&oomwait, &pdaemonmtx); + mutex_exit(&pdaemonmtx); +} + +void +uvm_pageout_start(int npages) +{ + + /* we don't have the heuristics */ +} + +void +uvm_pageout_done(int npages) +{ + + /* could wakeup waiters, but just let the pagedaemon do it */ +} + +/* + * Under-construction page mistress. This is lacking vfs support, namely: + * + * 1) draining vfs buffers + * 2) paging out pages in vm vnode objects + * (we will not page out anon memory on the basis that + * that's the task of the host) + */ + +void +uvm_pageout(void *arg) +{ + struct pool *pp, *pp_first; + uint64_t where; + int timo = 0; + bool succ; + + mutex_enter(&pdaemonmtx); + for (;;) { + cv_timedwait(&pdaemoncv, &pdaemonmtx, timo); + uvmexp.pdwoke++; + kernel_map->flags |= VM_MAP_WANTVA; + mutex_exit(&pdaemonmtx); + + succ = false; + pool_drain_start(&pp_first, &where); + pp = pp_first; + for (;;) { + succ = pool_drain_end(pp, where); + if (succ) + break; + pool_drain_start(&pp, &where); + if (pp == pp_first) { + succ = pool_drain_end(pp, where); + break; + } + } + mutex_enter(&pdaemonmtx); + + if (!succ) { + rumpuser_dprintf("pagedaemoness: failed to reclaim " + "memory ... sleeping (deadlock?)\n"); + timo = hz; + continue; + } + kernel_map->flags &= ~VM_MAP_WANTVA; + timo = 0; + + if (pdaemon_waiters) { + pdaemon_waiters = 0; + cv_broadcast(&oomwait); + } + } + + panic("you can swap out any time you like, but you can never leave"); +} + +/* + * In a regular kernel the pagedaemon is activated when memory becomes + * low. In a virtual rump kernel we do not know exactly how much memory + * we have available -- it depends on the conditions on the host. + * Therefore, we cannot preemptively kick the pagedaemon. Rather, we + * wait until things we desperate and we're forced to uvm_wait(). + * + * The alternative would be to allocate a huge chunk of memory at + * startup, but that solution has a number of problems including + * being a resource hog, failing anyway due to host memory overcommit + * and core dump size. + */ + +void +uvm_kick_pdaemon() +{ + + /* nada */ +} + +void * +rump_hypermalloc(size_t howmuch, int alignment, bool waitok, const char *wmsg) +{ + void *rv; + + again: + rv = rumpuser_malloc(howmuch, alignment); + if (__predict_false(rv == NULL && waitok)) { + uvm_wait(wmsg); + goto again; + } + + return rv; +}