> Date: Fri, 11 Dec 2020 11:51:54 -0600
> From: Scott Cheloha <[email protected]>
> 
> On Fri, Dec 11, 2020 at 09:49:07AM -0300, Martin Pieuchot wrote:
> > On 11/12/20(Fri) 12:52, Mark Kettenis wrote:
> > > > Date: Thu, 10 Dec 2020 16:13:22 -0600
> > > > From: Scott Cheloha <[email protected]>
> > > > 
> > > > Hi,
> > > > 
> > > > We looked at removing the ticks from subr_pool.c a while back but it
> > > > got shelved.  That may or may not have been my fault.  I don't
> > > > remember.
> > > > 
> > > > Anyway, I would normally suggest switching to getuptime(9) here, but
> > > > getuptime(9) counts in seconds and we're working with a 1 second
> > > > timeout in this code (pool_wait_free) so that's too coarse an
> > > > interface for this job.
> > > > 
> > > > The next best thing I could come up with was introducing a coarse
> > > > sub-second interface for use in this file, "getnsecuptime()", which
> > > > calls getnanouptime(9) and converts the result to a 64-bit count of
> > > > nanoseconds.  This is relatively fast (we don't read the underlying
> > > > timecounter hardware) and causes a minimal amount of code change (we
> > > > can use it inline because it returns an integral value).
> > > > 
> > > > >From there the changes are simple:
> > > > 
> > > > - Renames: ph_tick -> ph_timestamp, pr_cache_tick -> pr_cache_timestamp
> > > > 
> > > > - Call getnsecuptime(9) wherever we read 'ticks'.
> > > > 
> > > > - Change pool_wait_gc and pool_wait_free to counts of nanoseconds.
> > > >   They could be macros, e.g.
> > > > 
> > > > #define POOL_WAIT_GC    8000000000ULL
> > > > 
> > > >   but I'll leave that for a second diff to keep things simple.
> > > > 
> > > > This compiles and I haven't changed any logic so I assume it isn't
> > > > broken.
> > > > 
> > > > We could move getnsecuptime() into kern_tc.c but it isn't used
> > > > anywhere else yet so I'm hesitant to do so.
> > > > 
> > > > Thoughts?
> > > 
> > > Specifying the timeouts in nanoseconds isn't particularly useful I'd
> > > say.  But I see we can't use SEC_TO_NSEC here because of the overflow
> > > check...
> > 
> > I'm not sure to understand, can't we do:
> > 
> >     pool_wait_free = SEC_TO_NSEC(1);
> >     pool_wait_gc = SEC_TO_NSEC(8);
> > 
> > or are you pointing at something else?
> 
> We can do that at runtime but not at compile time.  SEC_TO_NSEC(1)
> isn't a constant so that won't compile (I just tried).
> 
> We _could_ do something like this:
> 
> #define POOL_WAIT_FREE        SEC_TO_NSEC(1)
> 
> I think the compiler will probably inline the result and elide the
> overflow check because the input is a constant.  I don't know how to
> verify this, but my limited understanding of compilers suggests that
> this is totally possible.

Yes.  The consequence of that is that the values are no longer
patchable.  That may not be very important though (I never really use
that possibility).

> > One comment below
> > 
> > [...]
> >
> > > > Index: sys/pool.h
> > > > ===================================================================
> > > > RCS file: /cvs/src/sys/sys/pool.h,v
> > > > retrieving revision 1.77
> > > > diff -u -p -r1.77 pool.h
> > > > --- sys/pool.h  19 Jul 2019 09:03:03 -0000      1.77
> > > > +++ sys/pool.h  10 Dec 2020 22:08:33 -0000
> > > > @@ -201,7 +201,7 @@ struct pool {
> > > >         u_int           pr_cache_items; /* target list length */
> > > >         u_int           pr_cache_contention;
> > > >         u_int           pr_cache_contention_prev;
> > > > -       int             pr_cache_tick;  /* time idle list was empty */
> > > > +       uint64_t        pr_cache_timestamp;     /* when idle list was 
> > > > empty */
> > > >         int             pr_cache_nout;
> > 
> > Do you see a change in of size in the struct?  If so, does moving
> > `pr_cache_nout' after `pr_cache_ngc' helps?
> 
> Right, the structs.  We're changing both struct pool_page_header and
> struct pool.
> 
> For the unmodified (-current) binary on amd64:
> 
> $ gdb -q obj/bsd.gdb
> (gdb) p sizeof(struct pool_page_header)
> $1 = 112
> (gdb) p sizeof(struct pool)
> $2 = 424
> 
> For the patched binary on amd64:
> 
> (gdb) p sizeof(struct pool_page_header) 
> $1 = 112
> (gdb) p sizeof(struct pool)
> $2 = 432
> 
> kettenis@: The pool_page_header does not grow.  That's good, right?
> 
> If do as mpi@ suggested and we move pr_cache_nout after
> pr_cache_ngc...
> 
> $ gdb -q obj/bsd.gdb 
> (gdb) p sizeof(struct pool_page_header)
> $1 = 112
> (gdb) p sizeof(struct pool)
> $2 = 424
> 
> Bingo!  Same size.  mpi@, how did you know?
> 
> Patch attached with the struct shuffling and the new macros below.
> 
> Index: kern/subr_pool.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/subr_pool.c,v
> retrieving revision 1.230
> diff -u -p -r1.230 subr_pool.c
> --- kern/subr_pool.c  24 Jan 2020 06:31:17 -0000      1.230
> +++ kern/subr_pool.c  11 Dec 2020 17:50:46 -0000
> @@ -41,6 +41,7 @@
>  #include <sys/syslog.h>
>  #include <sys/sysctl.h>
>  #include <sys/task.h>
> +#include <sys/time.h>
>  #include <sys/timeout.h>
>  #include <sys/percpu.h>
>  
> @@ -148,7 +149,7 @@ struct pool_page_header {
>       caddr_t                 ph_page;        /* this page's address */
>       caddr_t                 ph_colored;     /* page's colored address */
>       unsigned long           ph_magic;
> -     int                     ph_tick;
> +     uint64_t                ph_timestamp;
>  };
>  #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
>  #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
> @@ -266,8 +267,19 @@ void     pool_gc_sched(void *);
>  struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL);
>  void pool_gc_pages(void *);
>  struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL);
> -int pool_wait_free = 1;
> -int pool_wait_gc = 8;
> +
> +#define POOL_WAIT_FREE       SEC_TO_NSEC(1)
> +#define POOL_WAIT_GC SEC_TO_NSEC(8)
> +
> +/* XXX where do I put this? */
> +uint64_t
> +getnsecuptime(void)
> +{
> +     struct timespec now;
> +
> +     getnanouptime(&now);
> +     return TIMESPEC_TO_NSEC(&now);
> +}
>  
>  RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare);
>  
> @@ -797,7 +809,7 @@ pool_put(struct pool *pp, void *v)
>       /* is it time to free a page? */
>       if (pp->pr_nidle > pp->pr_maxpages &&
>           (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
> -         (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
> +         getnsecuptime() - ph->ph_timestamp > POOL_WAIT_FREE) {
>               freeph = ph;
>               pool_p_remove(pp, freeph);
>       }
> @@ -864,7 +876,7 @@ pool_do_put(struct pool *pp, void *v)
>                */
>               pp->pr_nidle++;
>  
> -             ph->ph_tick = ticks;
> +             ph->ph_timestamp = getnsecuptime();
>               TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
>               TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
>               pool_update_curpage(pp);
> @@ -1566,7 +1578,7 @@ pool_gc_pages(void *null)
>               /* is it time to free a page? */
>               if (pp->pr_nidle > pp->pr_minpages &&
>                   (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
> -                 (ticks - ph->ph_tick) > (hz * pool_wait_gc)) {
> +                 getnsecuptime() - ph->ph_timestamp > POOL_WAIT_GC) {
>                       freeph = ph;
>                       pool_p_remove(pp, freeph);
>               } else
> @@ -1726,7 +1738,7 @@ pool_cache_init(struct pool *pp)
>       arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic));
>       TAILQ_INIT(&pp->pr_cache_lists);
>       pp->pr_cache_nitems = 0;
> -     pp->pr_cache_tick = ticks;
> +     pp->pr_cache_timestamp = getnsecuptime();
>       pp->pr_cache_items = 8;
>       pp->pr_cache_contention = 0;
>       pp->pr_cache_ngc = 0;
> @@ -1829,7 +1841,7 @@ pool_cache_list_free(struct pool *pp, st
>  {
>       pool_list_enter(pp);
>       if (TAILQ_EMPTY(&pp->pr_cache_lists))
> -             pp->pr_cache_tick = ticks;
> +             pp->pr_cache_timestamp = getnsecuptime();
>  
>       pp->pr_cache_nitems += POOL_CACHE_ITEM_NITEMS(ci);
>       TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl);
> @@ -2006,7 +2018,7 @@ pool_cache_gc(struct pool *pp)
>  {
>       unsigned int contention, delta;
>  
> -     if ((ticks - pp->pr_cache_tick) > (hz * pool_wait_gc) &&
> +     if (getnsecuptime() - pp->pr_cache_timestamp > POOL_WAIT_GC &&
>           !TAILQ_EMPTY(&pp->pr_cache_lists) &&
>           pl_enter_try(pp, &pp->pr_cache_lock)) {
>               struct pool_cache_item *pl = NULL;
> @@ -2015,7 +2027,7 @@ pool_cache_gc(struct pool *pp)
>               if (pl != NULL) {
>                       TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl);
>                       pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl);
> -                     pp->pr_cache_tick = ticks;
> +                     pp->pr_cache_timestamp = getnsecuptime();
>  
>                       pp->pr_cache_ngc++;
>               }
> Index: sys/pool.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/pool.h,v
> retrieving revision 1.77
> diff -u -p -r1.77 pool.h
> --- sys/pool.h        19 Jul 2019 09:03:03 -0000      1.77
> +++ sys/pool.h        11 Dec 2020 17:50:46 -0000
> @@ -201,9 +201,9 @@ struct pool {
>       u_int           pr_cache_items; /* target list length */
>       u_int           pr_cache_contention;
>       u_int           pr_cache_contention_prev;
> -     int             pr_cache_tick;  /* time idle list was empty */
> -     int             pr_cache_nout;
> +     uint64_t        pr_cache_timestamp;     /* time idle list was empty */
>       uint64_t        pr_cache_ngc;   /* # of times the gc released a list */
> +     int             pr_cache_nout;
>  
>       u_int           pr_align;
>       u_int           pr_maxcolors;   /* Cache coloring */
> 
> 

Reply via email to