> On 23 Dec 2014, at 11:38, David Gwynne <da...@gwynne.id.au> wrote:
> 
> On Mon, Dec 22, 2014 at 10:54:16AM -0500, Ted Unangst wrote:
>> On Mon, Dec 22, 2014 at 14:59, Mike Belopuhov wrote:
>>> 1) how is it different from what we have now?
>>> 
>>> 2) why can't you do it when you pool_put?
>> 
>> Right now, if you allocate a bunch of things, then free them, the
>> pages won't be freed because the timestamp is new. But you've already
>> freed everything, so there won't be a future pool_put to trigger the
>> final page freeing. This keeps pages from getting stuck.
>> 
>>> 3) why can't you call it from uvm when there's memory pressure since
>>> from what i understand pool_reclaim was supposed to work like that?
>> 
>> Calling reclaim only when we're low on memory is sometimes too late.
> 
> and we never do. reclaim is only called when sysctl kern.pool_debug
> is fiddled with.
> 
>> 
>>> 4) i assume you don't want to call pool_reclaim from pool_gc_pages
>>> because of the mutex_enter_try benefits, but why does logic in
>>> these functions differ, e.g. why did you omit the pr_itemsperpage
>>> bit?
>> 
>> We're not trying to release all free pages. Only the ones that are both
>> too many and too old. This is the same logic that is already in
>> pool_put.
>> 
>>> 7) it looks like pool_reclaim_all should also raise an IPL since it
>>> does the same thing.  wasn't it noteced before?
>> 
>> Likely. I don't think reclaim_all gets called very often.
> 
> or at all, really.
> 
> mikeb, id really appreciate it if you could benchmark with this diff.
> 
> if you're interested in seeing the effect of freeing at different
> intervals, you could try the diff below. it adds kern.pool.wait_free
> and kern.pool.wait_gc tunables so you can set how long a page has
> to be idle before a pool_put and the gc respectively will select a
> page for freeing.
> 
> id prefer to pick values that Just Work(tm) all the time, but being
> able to test different values easily might be useful in the short
> term.

ive been running the diff below in production for nearly a month now without 
issue.

anyone want to ok this? if i commit this would anyone object?

dlg

> 
> Index: sbin/sysctl/sysctl.c
> ===================================================================
> RCS file: /cvs/src/sbin/sysctl/sysctl.c,v
> retrieving revision 1.207
> diff -u -p -r1.207 sysctl.c
> --- sbin/sysctl/sysctl.c      19 Nov 2014 18:04:54 -0000      1.207
> +++ sbin/sysctl/sysctl.c      22 Dec 2014 23:44:52 -0000
> @@ -44,6 +44,7 @@
> #include <sys/sched.h>
> #include <sys/sensors.h>
> #include <sys/vmmeter.h>
> +#include <sys/pool.h>
> #include <net/route.h>
> #include <net/if.h>
> 
> @@ -125,6 +126,7 @@ struct ctlname semname[] = CTL_KERN_SEMI
> struct ctlname shmname[] = CTL_KERN_SHMINFO_NAMES;
> struct ctlname watchdogname[] = CTL_KERN_WATCHDOG_NAMES;
> struct ctlname tcname[] = CTL_KERN_TIMECOUNTER_NAMES;
> +struct ctlname poolname[] = CTL_KERN_POOL_NAMES;
> struct ctlname *vfsname;
> #ifdef CTL_MACHDEP_NAMES
> struct ctlname machdepname[] = CTL_MACHDEP_NAMES;
> @@ -207,6 +209,7 @@ int sysctl_seminfo(char *, char **, int 
> int sysctl_shminfo(char *, char **, int *, int, int *);
> int sysctl_watchdog(char *, char **, int *, int, int *);
> int sysctl_tc(char *, char **, int *, int, int *);
> +int sysctl_pool(char *, char **, int *, int, int *);
> int sysctl_sensors(char *, char **, int *, int, int *);
> void print_sensordev(char *, int *, u_int, struct sensordev *);
> void print_sensor(struct sensor *);
> @@ -388,6 +391,11 @@ parse(char *string, int flags)
>                               return;
>                       warnx("use dmesg to view %s", string);
>                       return;
> +             case KERN_POOL:
> +                     len = sysctl_pool(string, &bufp, mib, flags, &type);
> +                     if (len < 0)
> +                             return;
> +                     break;
>               case KERN_PROC:
>                       if (flags == 0)
>                               return;
> @@ -1633,6 +1641,7 @@ struct list semlist = { semname, KERN_SE
> struct list shmlist = { shmname, KERN_SHMINFO_MAXID };
> struct list watchdoglist = { watchdogname, KERN_WATCHDOG_MAXID };
> struct list tclist = { tcname, KERN_TIMECOUNTER_MAXID };
> +struct list poollist = { poolname, KERN_POOL_MAXID };
> 
> /*
>  * handle vfs namei cache statistics
> @@ -2302,6 +2311,25 @@ sysctl_tc(char *string, char **bufpp, in
>               return (-1);
>       mib[2] = indx;
>       *typep = tclist.list[indx].ctl_type;
> +     return (3);
> +}
> +
> +/*
> + * Handle pools support
> + */
> +int
> +sysctl_pool(char *string, char **bufpp, int mib[], int flags, int *typep)
> +{
> +     int indx;
> +
> +     if (*bufpp == NULL) {
> +             listall(string, &poollist);
> +             return (-1);
> +     }
> +     if ((indx = findname(string, "third", bufpp, &poollist)) == -1)
> +             return (-1);
> +     mib[2] = indx;
> +     *typep = poollist.list[indx].ctl_type;
>       return (3);
> }
> 
> Index: sys/kern/init_main.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/init_main.c,v
> retrieving revision 1.227
> diff -u -p -r1.227 init_main.c
> --- sys/kern/init_main.c      15 Dec 2014 02:24:23 -0000      1.227
> +++ sys/kern/init_main.c      22 Dec 2014 23:44:55 -0000
> @@ -121,6 +121,8 @@ extern struct timeout setperf_to;
> void setperf_auto(void *);
> #endif
> 
> +extern struct timeout pool_gc_tick;
> +
> int   cmask = CMASK;
> extern        struct user *proc0paddr;
> 
> @@ -554,6 +556,11 @@ main(void *framep)
> #ifndef SMALL_KERNEL
>       timeout_set(&setperf_to, setperf_auto, NULL);
> #endif
> +
> +     /*
> +      * Start the idle pool page garbage collector
> +      */
> +     timeout_add_sec(&pool_gc_tick, 1);
> 
>         /*
>          * proc0: nothing to do, back to sleep
> Index: sys/kern/kern_sysctl.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
> retrieving revision 1.277
> diff -u -p -r1.277 kern_sysctl.c
> --- sys/kern/kern_sysctl.c    12 Dec 2014 07:45:46 -0000      1.277
> +++ sys/kern/kern_sysctl.c    22 Dec 2014 23:44:55 -0000
> @@ -496,7 +496,8 @@ kern_sysctl(int *name, u_int namelen, vo
>       case KERN_NPROCS:
>               return (sysctl_rdint(oldp, oldlenp, newp, nprocesses));
>       case KERN_POOL:
> -             return (sysctl_dopool(name + 1, namelen - 1, oldp, oldlenp));
> +             return (sysctl_pool(name + 1, namelen - 1, oldp, oldlenp,
> +                 newp, newlen));
>       case KERN_STACKGAPRANDOM:
>               stackgap = stackgap_random;
>               error = sysctl_int(oldp, oldlenp, newp, newlen, &stackgap);
> Index: sys/kern/subr_pool.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/subr_pool.c,v
> retrieving revision 1.173
> diff -u -p -r1.173 subr_pool.c
> --- sys/kern/subr_pool.c      22 Dec 2014 00:33:40 -0000      1.173
> +++ sys/kern/subr_pool.c      22 Dec 2014 23:44:55 -0000
> @@ -40,6 +40,8 @@
> #include <sys/syslog.h>
> #include <sys/rwlock.h>
> #include <sys/sysctl.h>
> +#include <sys/task.h>
> +#include <sys/timeout.h>
> 
> #include <uvm/uvm_extern.h>
> 
> @@ -160,6 +162,14 @@ void      pool_print1(struct pool *, const c
> 
> #define pool_sleep(pl) msleep(pl, &pl->pr_mtx, PSWP, pl->pr_wchan, 0)
> 
> +/* stale page garbage collectors */
> +void pool_gc_sched(void *);
> +struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL);
> +void pool_gc_pages(void *, void *);
> +struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL, NULL);
> +int pool_wait_free = 1;
> +int pool_wait_gc = 8;
> +
> static inline int
> phtree_compare(struct pool_item_header *a, struct pool_item_header *b)
> {
> @@ -678,7 +688,7 @@ pool_put(struct pool *pp, void *v)
>       /* is it time to free a page? */
>       if (pp->pr_nidle > pp->pr_maxpages &&
>           (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
> -         (ticks - ph->ph_tick) > hz) {
> +         (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
>               freeph = ph;
>               pool_p_remove(pp, freeph);
>       }
> @@ -1269,11 +1279,13 @@ pool_walk(struct pool *pp, int full,
>  * kern.pool.name.<pool#> - the name for pool#.
>  */
> int
> -sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp)
> +sysctl_pool(int *name, u_int namelen, char *oldp, size_t *oldlenp,
> +    void *newp, size_t newlen)
> {
>       struct kinfo_pool pi;
>       struct pool *pp;
>       int rv = ENOENT;
> +     int wait;
> 
>       switch (name[0]) {
>       case KERN_POOL_NPOOLS:
> @@ -1284,6 +1296,35 @@ sysctl_dopool(int *name, u_int namelen, 
>       case KERN_POOL_NAME:
>       case KERN_POOL_POOL:
>               break;
> +
> +     case KERN_POOL_WAITFREE:
> +             if (namelen != 1)
> +                     return (ENOTDIR);
> +             wait = pool_wait_free;
> +             rv = sysctl_int(oldp, oldlenp, newp, newlen, &wait);
> +             if (rv != 0)
> +                     return (rv);
> +             if (newp == NULL)
> +                     return (0);
> +             if (wait < 1 || wait >= pool_wait_gc)
> +                     return (EINVAL);
> +             pool_wait_free = wait;
> +             return (0);
> +
> +     case KERN_POOL_WAITGC:
> +             if (namelen != 1)
> +                     return (ENOTDIR);
> +             wait = pool_wait_gc;
> +             rv = sysctl_int(oldp, oldlenp, newp, newlen, &wait);
> +             if (rv != 0)
> +                     return (rv);
> +             if (newp == NULL)
> +                     return (0);
> +             if (wait <= pool_wait_free || wait > 3600)
> +                     return (EINVAL);
> +             pool_wait_gc = wait;
> +             return (0);
> +
>       default:
>               return (EOPNOTSUPP);
>       }
> @@ -1337,6 +1378,46 @@ done:
>       rw_exit_read(&pool_lock);
> 
>       return (rv);
> +}
> +
> +void
> +pool_gc_sched(void *x)
> +{
> +     task_add(systq, &pool_gc_task);
> +}
> +
> +void
> +pool_gc_pages(void *x, void *y)
> +{
> +     extern int ticks;
> +     struct pool *pp;
> +     struct pool_item_header *ph, *freeph;
> +     int s;
> +
> +     rw_enter_read(&pool_lock);
> +     s = splvm();
> +     SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
> +             if (!mtx_enter_try(&pp->pr_mtx))
> +                     continue;
> +
> +             /* is it time to free a page? */
> +             if (pp->pr_nidle > pp->pr_minpages &&
> +                 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
> +                 (ticks - ph->ph_tick) > (hz * pool_wait_gc)) {
> +                     freeph = ph;
> +                     pool_p_remove(pp, freeph);
> +             } else
> +                     freeph = NULL;
> +
> +             mtx_leave(&pp->pr_mtx);
> +
> +             if (freeph != NULL)
> +                     pool_p_free(pp, freeph);
> +     }
> +     splx(s); /* XXX */
> +     rw_exit_read(&pool_lock);
> +
> +     timeout_add_sec(&pool_gc_tick, 1);
> }
> 
> /*
> Index: sys/sys/pool.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/pool.h,v
> retrieving revision 1.55
> diff -u -p -r1.55 pool.h
> --- sys/sys/pool.h    19 Dec 2014 02:15:25 -0000      1.55
> +++ sys/sys/pool.h    22 Dec 2014 23:44:55 -0000
> @@ -39,10 +39,24 @@
>  * kern.pool.npools
>  * kern.pool.name.<number>
>  * kern.pool.pool.<number>
> + * kern.pool.wait_free
> + * kern.pool.wait_gc
>  */
> #define KERN_POOL_NPOOLS      1
> #define KERN_POOL_NAME                2
> #define KERN_POOL_POOL                3
> +#define KERN_POOL_WAITFREE   4
> +#define KERN_POOL_WAITGC     5
> +#define KERN_POOL_MAXID              6
> +
> +#define CTL_KERN_POOL_NAMES { \
> +     { 0, 0 }, \
> +     { "npools", CTLTYPE_INT }, \
> +     { "name", CTLTYPE_NODE }, \
> +     { "pool", CTLTYPE_NODE }, \
> +     { "wait_free", CTLTYPE_INT }, \
> +     { "wait_gc", CTLTYPE_INT } \
> +}
> 
> struct kinfo_pool {
>       unsigned int    pr_size;        /* size of a pool item */
> Index: sys/sys/sysctl.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/sysctl.h,v
> retrieving revision 1.153
> diff -u -p -r1.153 sysctl.h
> --- sys/sys/sysctl.h  12 Dec 2014 07:45:46 -0000      1.153
> +++ sys/sys/sysctl.h  22 Dec 2014 23:44:55 -0000
> @@ -945,7 +945,7 @@ int sysctl_vnode(char *, size_t *, struc
> #ifdef GPROF
> int sysctl_doprof(int *, u_int, void *, size_t *, void *, size_t);
> #endif
> -int sysctl_dopool(int *, u_int, char *, size_t *);
> +int sysctl_pool(int *, u_int, char *, size_t *, void *, size_t);
> 
> int kern_sysctl(int *, u_int, void *, size_t *, void *, size_t,
>                    struct proc *);


Reply via email to