On Sun, Jan 18, 2015 at 11:12:47PM -0500, Ted Unangst wrote:
> On Mon, Jan 19, 2015 at 11:28, David Gwynne wrote:
>
> >> if you're interested in seeing the effect of freeing at different
> >> intervals, you could try the diff below. it adds kern.pool.wait_free
> >> and kern.pool.wait_gc tunables so you can set how long a page has
> >> to be idle before a pool_put and the gc respectively will select a
> >> page for freeing.
> >>
> >> id prefer to pick values that Just Work(tm) all the time, but being
> >> able to test different values easily might be useful in the short
> >> term.
> >
> > ive been running the diff below in production for nearly a month now
> > without issue.
> >
> > anyone want to ok this? if i commit this would anyone object?
>
> I thought the sysctl code was ok for testing, but not for commit. I
> don't think we should be adding more knobs like this. A diff without
> that would be much more attractive.
like this one?
Index: subr_pool.c
===================================================================
RCS file: /cvs/src/sys/kern/subr_pool.c,v
retrieving revision 1.178
diff -u -p -r1.178 subr_pool.c
--- subr_pool.c 19 Jan 2015 03:57:22 -0000 1.178
+++ subr_pool.c 19 Jan 2015 06:38:54 -0000
@@ -40,6 +40,8 @@
#include <sys/syslog.h>
#include <sys/rwlock.h>
#include <sys/sysctl.h>
+#include <sys/task.h>
+#include <sys/timeout.h>
#include <uvm/uvm_extern.h>
@@ -160,6 +162,14 @@ void pool_print1(struct pool *, const c
#define pool_sleep(pl) msleep(pl, &pl->pr_mtx, PSWP, pl->pr_wchan, 0)
+/* stale page garbage collectors */
+void pool_gc_sched(void *);
+struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL);
+void pool_gc_pages(void *, void *);
+struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL, NULL);
+int pool_wait_free = 1;
+int pool_wait_gc = 8;
+
static inline int
phtree_compare(struct pool_item_header *a, struct pool_item_header *b)
{
@@ -681,7 +691,7 @@ pool_put(struct pool *pp, void *v)
/* is it time to free a page? */
if (pp->pr_nidle > pp->pr_maxpages &&
(ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
- (ticks - ph->ph_tick) > hz) {
+ (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
freeph = ph;
pool_p_remove(pp, freeph);
}
@@ -1338,6 +1348,47 @@ done:
rw_exit_read(&pool_lock);
return (rv);
+}
+
+void
+pool_gc_sched(void *null)
+{
+ task_add(systqmp, &pool_gc_task);
+}
+
+void
+pool_gc_pages(void *null, void *x)
+{
+ extern int ticks;
+ struct pool *pp;
+ struct pool_item_header *ph, *freeph;
+ int s;
+
+ rw_enter_read(&pool_lock);
+ s = splvm();
+ SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
+ if (pp->pr_nidle <= pp->pr_minpages || /* guess */
+ !mtx_enter_try(&pp->pr_mtx)) /* try */
+ continue;
+
+ /* is it time to free a page? */
+ if (pp->pr_nidle > pp->pr_minpages &&
+ (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
+ (ticks - ph->ph_tick) > (hz * pool_wait_gc)) {
+ freeph = ph;
+ pool_p_remove(pp, freeph);
+ } else
+ freeph = NULL;
+
+ mtx_leave(&pp->pr_mtx);
+
+ if (freeph != NULL)
+ pool_p_free(pp, freeph);
+ }
+ splx(s); /* XXX */
+ rw_exit_read(&pool_lock);
+
+ timeout_add_sec(&pool_gc_tick, 1);
}
/*