Module Name:    src
Committed By:   pooka
Date:           Thu Jun  3 10:56:20 UTC 2010

Modified Files:
        src/sys/rump/librump/rumpkern: locks_up.c memalloc.c rump.c
            rump_private.h vm.c

Log Message:
Implement a sort-of pagedaemon: adjust all memory allocators to go
through an in-rumpkernel hypermemory allocator which knows it should
kick the pagedaemon and block in case ``waitok'' memory allocation
fails.

This allows us to recover from some out-of-memory situations.
Realworld'istically speaking (as opposed to whatever "should be"
theory), these OOM situations will happen extremely rarely if ever
when our hypervisor is a regular process.  Speculatively, this
should be useful for other types of hosts.

issues remaining:
 * the hypervisor does not know how to reclaim kernel memory (and
   for the reason I stated above, I'm not sure if it makes sense
   to teach the current implementation about that)
 * vfs memory (buffers, vm object pages etc.) is not reclaimed


To generate a diff of this commit:
cvs rdiff -u -r1.2 -r1.3 src/sys/rump/librump/rumpkern/locks_up.c
cvs rdiff -u -r1.6 -r1.7 src/sys/rump/librump/rumpkern/memalloc.c
cvs rdiff -u -r1.174 -r1.175 src/sys/rump/librump/rumpkern/rump.c
cvs rdiff -u -r1.49 -r1.50 src/sys/rump/librump/rumpkern/rump_private.h
cvs rdiff -u -r1.79 -r1.80 src/sys/rump/librump/rumpkern/vm.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/rump/librump/rumpkern/locks_up.c
diff -u src/sys/rump/librump/rumpkern/locks_up.c:1.2 src/sys/rump/librump/rumpkern/locks_up.c:1.3
--- src/sys/rump/librump/rumpkern/locks_up.c:1.2	Tue Jun  1 20:11:33 2010
+++ src/sys/rump/librump/rumpkern/locks_up.c	Thu Jun  3 10:56:20 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: locks_up.c,v 1.2 2010/06/01 20:11:33 pooka Exp $	*/
+/*	$NetBSD: locks_up.c,v 1.3 2010/06/03 10:56:20 pooka Exp $	*/
 
 /*
  * Copyright (c) 2010 Antti Kantee.  All Rights Reserved.
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: locks_up.c,v 1.2 2010/06/01 20:11:33 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: locks_up.c,v 1.3 2010/06/03 10:56:20 pooka Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -74,7 +74,7 @@
 	 * XXX: pool_cache would be nice, but not easily possible,
 	 * as pool cache init wants to call mutex_init() ...
 	 */
-	upm = rumpuser_malloc(sizeof(*upm), 0);
+	upm = rump_hypermalloc(sizeof(*upm), 0, true, "mutex_init");
 	memset(upm, 0, sizeof(*upm));
 	rumpuser_cv_init(&upm->upm_rucv);
 	memcpy(mtx, &upm, sizeof(void *));
@@ -178,7 +178,7 @@
 	CTASSERT(sizeof(krwlock_t) >= sizeof(void *));
 	checkncpu();
 
-	uprw = rumpuser_malloc(sizeof(*uprw), 0);
+	uprw = rump_hypermalloc(sizeof(*uprw), 0, true, "rwinit");
 	memset(uprw, 0, sizeof(*uprw));
 	rumpuser_cv_init(&uprw->uprw_rucv_reader);
 	rumpuser_cv_init(&uprw->uprw_rucv_writer);

Index: src/sys/rump/librump/rumpkern/memalloc.c
diff -u src/sys/rump/librump/rumpkern/memalloc.c:1.6 src/sys/rump/librump/rumpkern/memalloc.c:1.7
--- src/sys/rump/librump/rumpkern/memalloc.c:1.6	Tue Jun  1 20:11:33 2010
+++ src/sys/rump/librump/rumpkern/memalloc.c	Thu Jun  3 10:56:20 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: memalloc.c,v 1.6 2010/06/01 20:11:33 pooka Exp $	*/
+/*	$NetBSD: memalloc.c,v 1.7 2010/06/03 10:56:20 pooka Exp $	*/
 
 /*
  * Copyright (c) 2009 Antti Kantee.  All Rights Reserved.
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: memalloc.c,v 1.6 2010/06/01 20:11:33 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: memalloc.c,v 1.7 2010/06/03 10:56:20 pooka Exp $");
 
 #include <sys/param.h>
 #include <sys/kmem.h>
@@ -73,7 +73,7 @@
 {
 	void *rv;
 
-	rv = rumpuser_malloc(size, 0);
+	rv = rump_hypermalloc(size, 0, (flags & M_WAITOK) != 0, "malloc");
 	if (rv && flags & M_ZERO)
 		memset(rv, 0, size);
 
@@ -110,7 +110,7 @@
 kmem_alloc(size_t size, km_flag_t kmflag)
 {
 
-	return rumpuser_malloc(size, 0);
+	return rump_hypermalloc(size, 0, kmflag == KM_SLEEP, "kmem_alloc");
 }
 
 void *
@@ -240,18 +240,14 @@
 void *
 pool_get(struct pool *pp, int flags)
 {
-	void *rv;
 
 #ifdef DIAGNOSTIC
 	if (pp->pr_size == 0)
 		panic("%s: pool unit size 0.  not initialized?", __func__);
 #endif
 
-	rv = rumpuser_malloc(pp->pr_size, pp->pr_align);
-	if (rv == NULL && (flags & PR_WAITOK && (flags & PR_LIMITFAIL) == 0))
-		panic("%s: out of memory and PR_WAITOK", __func__);
-
-	return rv;
+	return rump_hypermalloc(pp->pr_size, pp->pr_align,
+	    (flags & PR_WAITOK) != 0, "pget");
 }
 
 void
@@ -299,6 +295,20 @@
 	pc->pc_pool.pr_drain_hook_arg = arg;
 }
 
+void
+pool_drain_start(struct pool **ppp, uint64_t *wp)
+{
+
+	/* nada */
+}
+
+void
+pool_drain_end(struct pool *pp, uint64_t w)
+{
+
+	/* nada again */
+}
+
 int
 pool_prime(struct pool *pp, int nitems)
 {

Index: src/sys/rump/librump/rumpkern/rump.c
diff -u src/sys/rump/librump/rumpkern/rump.c:1.174 src/sys/rump/librump/rumpkern/rump.c:1.175
--- src/sys/rump/librump/rumpkern/rump.c:1.174	Wed Jun  2 10:55:18 2010
+++ src/sys/rump/librump/rumpkern/rump.c	Thu Jun  3 10:56:20 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: rump.c,v 1.174 2010/06/02 10:55:18 pooka Exp $	*/
+/*	$NetBSD: rump.c,v 1.175 2010/06/03 10:56:20 pooka Exp $	*/
 
 /*
  * Copyright (c) 2007 Antti Kantee.  All Rights Reserved.
@@ -28,7 +28,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rump.c,v 1.174 2010/06/02 10:55:18 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rump.c,v 1.175 2010/06/03 10:56:20 pooka Exp $");
 
 #include <sys/systm.h>
 #define ELFSIZE ARCH_ELFSIZE
@@ -49,6 +49,7 @@
 #include <sys/kernel.h>
 #include <sys/kmem.h>
 #include <sys/kprintf.h>
+#include <sys/kthread.h>
 #include <sys/ksyms.h>
 #include <sys/msgbuf.h>
 #include <sys/module.h>
@@ -357,6 +358,15 @@
 	pipe_init();
 	resource_init();
 
+	/* start page baroness */
+	if (rump_threads) {
+		if (kthread_create(PRI_PGDAEMON, KTHREAD_MPSAFE, NULL,
+		    uvm_pageout, NULL, &uvm.pagedaemon_lwp, "pdaemon") != 0)
+			panic("pagedaemon create failed");
+	} else
+		uvm.pagedaemon_lwp = NULL; /* doesn't match curlwp */
+
+	/* process dso's */
 	rumpuser_dl_bootstrap(add_linkedin_modules, rump_kernelfsym_load);
 
 	/* these do nothing if not present */

Index: src/sys/rump/librump/rumpkern/rump_private.h
diff -u src/sys/rump/librump/rumpkern/rump_private.h:1.49 src/sys/rump/librump/rumpkern/rump_private.h:1.50
--- src/sys/rump/librump/rumpkern/rump_private.h:1.49	Wed Jun  2 10:55:18 2010
+++ src/sys/rump/librump/rumpkern/rump_private.h	Thu Jun  3 10:56:20 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: rump_private.h,v 1.49 2010/06/02 10:55:18 pooka Exp $	*/
+/*	$NetBSD: rump_private.h,v 1.50 2010/06/03 10:56:20 pooka Exp $	*/
 
 /*
  * Copyright (c) 2007 Antti Kantee.  All Rights Reserved.
@@ -129,4 +129,6 @@
 void	rump_intr_init(void);
 void	rump_softint_run(struct cpu_info *);
 
+void	*rump_hypermalloc(size_t, int, bool, const char *);
+
 #endif /* _SYS_RUMP_PRIVATE_H_ */

Index: src/sys/rump/librump/rumpkern/vm.c
diff -u src/sys/rump/librump/rumpkern/vm.c:1.79 src/sys/rump/librump/rumpkern/vm.c:1.80
--- src/sys/rump/librump/rumpkern/vm.c:1.79	Wed Jun  2 10:55:18 2010
+++ src/sys/rump/librump/rumpkern/vm.c	Thu Jun  3 10:56:20 2010
@@ -1,4 +1,4 @@
-/*	$NetBSD: vm.c,v 1.79 2010/06/02 10:55:18 pooka Exp $	*/
+/*	$NetBSD: vm.c,v 1.80 2010/06/03 10:56:20 pooka Exp $	*/
 
 /*
  * Copyright (c) 2007-2010 Antti Kantee.  All Rights Reserved.
@@ -43,15 +43,16 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.79 2010/06/02 10:55:18 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.80 2010/06/03 10:56:20 pooka Exp $");
 
 #include <sys/param.h>
 #include <sys/atomic.h>
+#include <sys/buf.h>
+#include <sys/kernel.h>
 #include <sys/kmem.h>
 #include <sys/mman.h>
 #include <sys/null.h>
 #include <sys/vnode.h>
-#include <sys/buf.h>
 
 #include <machine/pmap.h>
 
@@ -87,6 +88,10 @@
 static struct vm_map_kernel kernel_map_store;
 struct vm_map *kernel_map = &kernel_map_store.vmk_map;
 
+static unsigned int pdaemon_waiters;
+static kmutex_t pdaemonmtx;
+static kcondvar_t pdaemoncv, oomwait;
+
 /*
  * vm pages 
  */
@@ -233,12 +238,15 @@
 {
 
 	uvmexp.free = 1024*1024; /* XXX */
-	uvm.pagedaemon_lwp = NULL; /* doesn't match curlwp */
 	rump_vmspace.vm_map.pmap = pmap_kernel();
 
 	mutex_init(&pagermtx, MUTEX_DEFAULT, 0);
 	mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0);
 
+	mutex_init(&pdaemonmtx, MUTEX_DEFAULT, 0);
+	cv_init(&pdaemoncv, "pdaemon");
+	cv_init(&oomwait, "oomwait");
+
 	kernel_map->pmap = pmap_kernel();
 	callback_head_init(&kernel_map_store.vmk_reclaim_callback, IPL_VM);
 	kmem_map->pmap = pmap_kernel();
@@ -460,31 +468,10 @@
 vm_map_starved_p(struct vm_map *map)
 {
 
-	return false;
-}
+	if (map->flags & VM_MAP_WANTVA)
+		return true;
 
-void
-uvm_pageout_start(int npages)
-{
-
-	uvmexp.paging += npages;
-}
-
-void
-uvm_pageout_done(int npages)
-{
-
-	uvmexp.paging -= npages;
-
-	/*
-	 * wake up either of pagedaemon or LWPs waiting for it.
-	 */
-
-	if (uvmexp.free <= uvmexp.reserve_kernel) {
-		wakeup(&uvm.pagedaemon);
-	} else {
-		wakeup(&uvmexp.free);
-	}
+	return false;
 }
 
 int
@@ -582,14 +569,15 @@
 uvm_km_alloc_poolpage(struct vm_map *map, bool waitok)
 {
 
-	return (vaddr_t)rumpuser_malloc(PAGE_SIZE, PAGE_SIZE);
+	return (vaddr_t)rump_hypermalloc(PAGE_SIZE, PAGE_SIZE,
+	    waitok, "kmalloc");
 }
 
 void
 uvm_km_free_poolpage(struct vm_map *map, vaddr_t addr)
 {
 
-	rumpuser_unmap((void *)addr, PAGE_SIZE);
+	rumpuser_free((void *)addr);
 }
 
 vaddr_t
@@ -648,13 +636,6 @@
 }
 
 void
-uvm_wait(const char *msg)
-{
-
-	/* nothing to wait for */
-}
-
-void
 uvmspace_free(struct vmspace *vm)
 {
 
@@ -703,3 +684,129 @@
 
 	/* nada */
 }
+
+/*
+ * Routines related to the Page Baroness.
+ */
+
+void
+uvm_wait(const char *msg)
+{
+
+	if (__predict_false(curlwp == uvm.pagedaemon_lwp))
+		panic("pagedaemon out of memory");
+	if (__predict_false(rump_threads == 0))
+		panic("pagedaemon missing (RUMP_THREADS = 0)");
+
+	mutex_enter(&pdaemonmtx);
+	pdaemon_waiters++;
+	cv_signal(&pdaemoncv);
+	cv_wait(&oomwait, &pdaemonmtx);
+	mutex_exit(&pdaemonmtx);
+}
+
+void
+uvm_pageout_start(int npages)
+{
+
+	/* we don't have the heuristics */
+}
+
+void
+uvm_pageout_done(int npages)
+{
+
+	/* could wakeup waiters, but just let the pagedaemon do it */
+}
+
+/*
+ * Under-construction page mistress.  This is lacking vfs support, namely:
+ *
+ *  1) draining vfs buffers
+ *  2) paging out pages in vm vnode objects
+ *     (we will not page out anon memory on the basis that
+ *     that's the task of the host)
+ */
+
+void
+uvm_pageout(void *arg)
+{
+	struct pool *pp, *pp_first;
+	uint64_t where;
+	int timo = 0;
+	bool succ;
+
+	mutex_enter(&pdaemonmtx);
+	for (;;) {
+		cv_timedwait(&pdaemoncv, &pdaemonmtx, timo);
+		uvmexp.pdwoke++;
+		kernel_map->flags |= VM_MAP_WANTVA;
+		mutex_exit(&pdaemonmtx);
+
+		succ = false;
+		pool_drain_start(&pp_first, &where);
+		pp = pp_first;
+		for (;;) {
+			succ = pool_drain_end(pp, where);
+			if (succ)
+				break;
+			pool_drain_start(&pp, &where);
+			if (pp == pp_first) {
+				succ = pool_drain_end(pp, where);
+				break;
+			}
+		}
+		mutex_enter(&pdaemonmtx);
+
+		if (!succ) {
+			rumpuser_dprintf("pagedaemoness: failed to reclaim "
+			    "memory ... sleeping (deadlock?)\n");
+			timo = hz;
+			continue;
+		}
+		kernel_map->flags &= ~VM_MAP_WANTVA;
+		timo = 0;
+
+		if (pdaemon_waiters) {
+			pdaemon_waiters = 0;
+			cv_broadcast(&oomwait);
+		}
+	}
+
+	panic("you can swap out any time you like, but you can never leave");
+}
+
+/*
+ * In a regular kernel the pagedaemon is activated when memory becomes
+ * low.  In a virtual rump kernel we do not know exactly how much memory
+ * we have available -- it depends on the conditions on the host.
+ * Therefore, we cannot preemptively kick the pagedaemon.  Rather, we
+ * wait until things we desperate and we're forced to uvm_wait().
+ *
+ * The alternative would be to allocate a huge chunk of memory at
+ * startup, but that solution has a number of problems including
+ * being a resource hog, failing anyway due to host memory overcommit
+ * and core dump size.
+ */
+
+void
+uvm_kick_pdaemon()
+{
+
+	/* nada */
+}
+
+void *
+rump_hypermalloc(size_t howmuch, int alignment, bool waitok, const char *wmsg)
+{
+	void *rv;
+
+ again:
+	rv = rumpuser_malloc(howmuch, alignment);
+	if (__predict_false(rv == NULL && waitok)) {
+		uvm_wait(wmsg);
+		goto again;
+	}
+
+	return rv;
+}

Reply via email to