Re: pool(9): remove ticks (attempt 2)

2020-12-27 Thread dlg



> On 24 Dec 2020, at 3:16 am, Scott Cheloha  wrote:
> 
> On Fri, Dec 11, 2020 at 05:32:54PM -0600, Scott Cheloha wrote:
>> On Fri, Dec 11, 2020 at 07:52:45PM +0100, Mark Kettenis wrote:
 Date: Fri, 11 Dec 2020 11:51:54 -0600
 From: Scott Cheloha 
 
 On Fri, Dec 11, 2020 at 09:49:07AM -0300, Martin Pieuchot wrote:
> 
> I'm not sure to understand, can't we do:
> 
>   pool_wait_free = SEC_TO_NSEC(1);
>   pool_wait_gc = SEC_TO_NSEC(8);
> 
 [...]
 
 We can do that at runtime but not at compile time.  SEC_TO_NSEC(1)
 isn't a constant so that won't compile (I just tried).
 
 We _could_ do something like this:
 
 #define POOL_WAIT_FREE SEC_TO_NSEC(1)
 
 I think the compiler will probably inline the result and elide the
 overflow check because the input is a constant.  I don't know how to
 verify this, but my limited understanding of compilers suggests that
 this is totally possible.
>>> 
>>> Yes.  The consequence of that is that the values are no longer
>>> patchable.  That may not be very important though (I never really use
>>> that possibility).
>> 
>> What do you mean by "patchable"?  I assume you don't mean the source
>> code.
>> 
>> (Also, you did not comment on the struct stuff below so I'm proceeding
>> with the impression there's nothing at issue there.)
> 
> Hearing nothing after two weeks I assume nobody cares if timeouts are
> no longer patchable.

There's been no need to tweak them for years. I'd bet people would reach for 
the compiler before ddb for tuning these anyway.

> Looking for OKs on the attached patch.

OK by me.

> 
> CC tedu@/dlg@, who added these timeouts to pool(9) in the first place:
> 
> https://github.com/openbsd/src/commit/786f6c84e747ccb9777ad35d2b01160679aec089
> 
> Index: sys/pool.h
> ===
> RCS file: /cvs/src/sys/sys/pool.h,v
> retrieving revision 1.77
> diff -u -p -r1.77 pool.h
> --- sys/pool.h19 Jul 2019 09:03:03 -  1.77
> +++ sys/pool.h23 Dec 2020 17:06:19 -
> @@ -201,9 +201,9 @@ struct pool {
>   u_int   pr_cache_items; /* target list length */
>   u_int   pr_cache_contention;
>   u_int   pr_cache_contention_prev;
> - int pr_cache_tick;  /* time idle list was empty */
> - int pr_cache_nout;
> + uint64_tpr_cache_timestamp; /* time idle list was empty */
>   uint64_tpr_cache_ngc;   /* # of times the gc released a list */
> + int pr_cache_nout;
> 
>   u_int   pr_align;
>   u_int   pr_maxcolors;   /* Cache coloring */
> Index: kern/subr_pool.c
> ===
> RCS file: /cvs/src/sys/kern/subr_pool.c,v
> retrieving revision 1.230
> diff -u -p -r1.230 subr_pool.c
> --- kern/subr_pool.c  24 Jan 2020 06:31:17 -  1.230
> +++ kern/subr_pool.c  23 Dec 2020 17:06:20 -
> @@ -41,6 +41,7 @@
> #include 
> #include 
> #include 
> +#include 
> #include 
> #include 
> 
> @@ -148,7 +149,7 @@ struct pool_page_header {
>   caddr_t ph_page;/* this page's address */
>   caddr_t ph_colored; /* page's colored address */
>   unsigned long   ph_magic;
> - int ph_tick;
> + uint64_tph_timestamp;
> };
> #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
> #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
> @@ -266,8 +267,22 @@ void pool_gc_sched(void *);
> struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL);
> void  pool_gc_pages(void *);
> struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL);
> -int pool_wait_free = 1;
> -int pool_wait_gc = 8;
> +
> +#define POOL_WAIT_FREE   SEC_TO_NSEC(1)
> +#define POOL_WAIT_GC SEC_TO_NSEC(8)
> +
> +/*
> + * TODO Move getnsecuptime() to kern_tc.c and document it when we
> + * have callers in other modules.
> + */
> +static uint64_t
> +getnsecuptime(void)
> +{
> + struct timespec now;
> +
> + getnanouptime();
> + return TIMESPEC_TO_NSEC();
> +}
> 
> RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare);
> 
> @@ -797,7 +812,7 @@ pool_put(struct pool *pp, void *v)
>   /* is it time to free a page? */
>   if (pp->pr_nidle > pp->pr_maxpages &&
>   (ph = TAILQ_FIRST(>pr_emptypages)) != NULL &&
> - (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
> + getnsecuptime() - ph->ph_timestamp > POOL_WAIT_FREE) {
>   freeph = ph;
>   pool_p_remove(pp, freeph);
>   }
> @@ -864,7 +879,7 @@ pool_do_put(struct pool *pp, void *v)
>*/
>   pp->pr_nidle++;
> 
> - ph->ph_tick = ticks;
> + ph->ph_timestamp = getnsecuptime();
>   TAILQ_REMOVE(>pr_partpages, ph, ph_entry);
>   

Re: pool(9): remove ticks (attempt 2)

2020-12-23 Thread Scott Cheloha
On Fri, Dec 11, 2020 at 05:32:54PM -0600, Scott Cheloha wrote:
> On Fri, Dec 11, 2020 at 07:52:45PM +0100, Mark Kettenis wrote:
> > > Date: Fri, 11 Dec 2020 11:51:54 -0600
> > > From: Scott Cheloha 
> > > 
> > > On Fri, Dec 11, 2020 at 09:49:07AM -0300, Martin Pieuchot wrote:
> > > > 
> > > > I'm not sure to understand, can't we do:
> > > > 
> > > > pool_wait_free = SEC_TO_NSEC(1);
> > > > pool_wait_gc = SEC_TO_NSEC(8);
> > > > 
> > > [...]
> > > 
> > > We can do that at runtime but not at compile time.  SEC_TO_NSEC(1)
> > > isn't a constant so that won't compile (I just tried).
> > > 
> > > We _could_ do something like this:
> > > 
> > > #define POOL_WAIT_FREESEC_TO_NSEC(1)
> > > 
> > > I think the compiler will probably inline the result and elide the
> > > overflow check because the input is a constant.  I don't know how to
> > > verify this, but my limited understanding of compilers suggests that
> > > this is totally possible.
> > 
> > Yes.  The consequence of that is that the values are no longer
> > patchable.  That may not be very important though (I never really use
> > that possibility).
> 
> What do you mean by "patchable"?  I assume you don't mean the source
> code.
> 
> (Also, you did not comment on the struct stuff below so I'm proceeding
> with the impression there's nothing at issue there.)

Hearing nothing after two weeks I assume nobody cares if timeouts are
no longer patchable.

Looking for OKs on the attached patch.

CC tedu@/dlg@, who added these timeouts to pool(9) in the first place:

https://github.com/openbsd/src/commit/786f6c84e747ccb9777ad35d2b01160679aec089

Index: sys/pool.h
===
RCS file: /cvs/src/sys/sys/pool.h,v
retrieving revision 1.77
diff -u -p -r1.77 pool.h
--- sys/pool.h  19 Jul 2019 09:03:03 -  1.77
+++ sys/pool.h  23 Dec 2020 17:06:19 -
@@ -201,9 +201,9 @@ struct pool {
u_int   pr_cache_items; /* target list length */
u_int   pr_cache_contention;
u_int   pr_cache_contention_prev;
-   int pr_cache_tick;  /* time idle list was empty */
-   int pr_cache_nout;
+   uint64_tpr_cache_timestamp; /* time idle list was empty */
uint64_tpr_cache_ngc;   /* # of times the gc released a list */
+   int pr_cache_nout;
 
u_int   pr_align;
u_int   pr_maxcolors;   /* Cache coloring */
Index: kern/subr_pool.c
===
RCS file: /cvs/src/sys/kern/subr_pool.c,v
retrieving revision 1.230
diff -u -p -r1.230 subr_pool.c
--- kern/subr_pool.c24 Jan 2020 06:31:17 -  1.230
+++ kern/subr_pool.c23 Dec 2020 17:06:20 -
@@ -41,6 +41,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -148,7 +149,7 @@ struct pool_page_header {
caddr_t ph_page;/* this page's address */
caddr_t ph_colored; /* page's colored address */
unsigned long   ph_magic;
-   int ph_tick;
+   uint64_tph_timestamp;
 };
 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
@@ -266,8 +267,22 @@ void   pool_gc_sched(void *);
 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL);
 void   pool_gc_pages(void *);
 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL);
-int pool_wait_free = 1;
-int pool_wait_gc = 8;
+
+#define POOL_WAIT_FREE SEC_TO_NSEC(1)
+#define POOL_WAIT_GC   SEC_TO_NSEC(8)
+
+/*
+ * TODO Move getnsecuptime() to kern_tc.c and document it when we
+ * have callers in other modules.
+ */
+static uint64_t
+getnsecuptime(void)
+{
+   struct timespec now;
+
+   getnanouptime();
+   return TIMESPEC_TO_NSEC();
+}
 
 RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare);
 
@@ -797,7 +812,7 @@ pool_put(struct pool *pp, void *v)
/* is it time to free a page? */
if (pp->pr_nidle > pp->pr_maxpages &&
(ph = TAILQ_FIRST(>pr_emptypages)) != NULL &&
-   (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
+   getnsecuptime() - ph->ph_timestamp > POOL_WAIT_FREE) {
freeph = ph;
pool_p_remove(pp, freeph);
}
@@ -864,7 +879,7 @@ pool_do_put(struct pool *pp, void *v)
 */
pp->pr_nidle++;
 
-   ph->ph_tick = ticks;
+   ph->ph_timestamp = getnsecuptime();
TAILQ_REMOVE(>pr_partpages, ph, ph_entry);
TAILQ_INSERT_TAIL(>pr_emptypages, ph, ph_entry);
pool_update_curpage(pp);
@@ -1566,7 +1581,7 @@ pool_gc_pages(void *null)
/* is it time to free a page? */
if (pp->pr_nidle > pp->pr_minpages &&
(ph = 

Re: pool(9): remove ticks (attempt 2)

2020-12-11 Thread Scott Cheloha
On Fri, Dec 11, 2020 at 07:52:45PM +0100, Mark Kettenis wrote:
> > Date: Fri, 11 Dec 2020 11:51:54 -0600
> > From: Scott Cheloha 
> > 
> > On Fri, Dec 11, 2020 at 09:49:07AM -0300, Martin Pieuchot wrote:
> > > 
> > > I'm not sure to understand, can't we do:
> > > 
> > >   pool_wait_free = SEC_TO_NSEC(1);
> > >   pool_wait_gc = SEC_TO_NSEC(8);
> > > 
> > [...]
> > 
> > We can do that at runtime but not at compile time.  SEC_TO_NSEC(1)
> > isn't a constant so that won't compile (I just tried).
> > 
> > We _could_ do something like this:
> > 
> > #define POOL_WAIT_FREE  SEC_TO_NSEC(1)
> > 
> > I think the compiler will probably inline the result and elide the
> > overflow check because the input is a constant.  I don't know how to
> > verify this, but my limited understanding of compilers suggests that
> > this is totally possible.
> 
> Yes.  The consequence of that is that the values are no longer
> patchable.  That may not be very important though (I never really use
> that possibility).

What do you mean by "patchable"?  I assume you don't mean the source
code.

(Also, you did not comment on the struct stuff below so I'm proceeding
with the impression there's nothing at issue there.)

> > > One comment below
> > > 
> > > [...]
> > >
> > > > > Index: sys/pool.h
> > > > > ===
> > > > > RCS file: /cvs/src/sys/sys/pool.h,v
> > > > > retrieving revision 1.77
> > > > > diff -u -p -r1.77 pool.h
> > > > > --- sys/pool.h19 Jul 2019 09:03:03 -  1.77
> > > > > +++ sys/pool.h10 Dec 2020 22:08:33 -
> > > > > @@ -201,7 +201,7 @@ struct pool {
> > > > >   u_int   pr_cache_items; /* target list length */
> > > > >   u_int   pr_cache_contention;
> > > > >   u_int   pr_cache_contention_prev;
> > > > > - int pr_cache_tick;  /* time idle list was empty */
> > > > > + uint64_tpr_cache_timestamp; /* when idle list was 
> > > > > empty */
> > > > >   int pr_cache_nout;
> > > 
> > > Do you see a change in of size in the struct?  If so, does moving
> > > `pr_cache_nout' after `pr_cache_ngc' helps?
> > 
> > Right, the structs.  We're changing both struct pool_page_header and
> > struct pool.
> > 
> > For the unmodified (-current) binary on amd64:
> > 
> > $ gdb -q obj/bsd.gdb
> > (gdb) p sizeof(struct pool_page_header)
> > $1 = 112
> > (gdb) p sizeof(struct pool)
> > $2 = 424
> > 
> > For the patched binary on amd64:
> > 
> > (gdb) p sizeof(struct pool_page_header) 
> > $1 = 112
> > (gdb) p sizeof(struct pool)
> > $2 = 432
> > 
> > kettenis@: The pool_page_header does not grow.  That's good, right?
> > 
> > If do as mpi@ suggested and we move pr_cache_nout after
> > pr_cache_ngc...
> > 
> > $ gdb -q obj/bsd.gdb 
> > (gdb) p sizeof(struct pool_page_header)
> > $1 = 112
> > (gdb) p sizeof(struct pool)
> > $2 = 424
> > 
> > Bingo!  Same size.  mpi@, how did you know?
> > 
> > Patch attached with the struct shuffling and the new macros below.
> > 
> > Index: kern/subr_pool.c
> > ===
> > RCS file: /cvs/src/sys/kern/subr_pool.c,v
> > retrieving revision 1.230
> > diff -u -p -r1.230 subr_pool.c
> > --- kern/subr_pool.c24 Jan 2020 06:31:17 -  1.230
> > +++ kern/subr_pool.c11 Dec 2020 17:50:46 -
> > @@ -41,6 +41,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >  
> > @@ -148,7 +149,7 @@ struct pool_page_header {
> > caddr_t ph_page;/* this page's address */
> > caddr_t ph_colored; /* page's colored address */
> > unsigned long   ph_magic;
> > -   int ph_tick;
> > +   uint64_tph_timestamp;
> >  };
> >  #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
> >  #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
> > @@ -266,8 +267,19 @@ void   pool_gc_sched(void *);
> >  struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL);
> >  void   pool_gc_pages(void *);
> >  struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL);
> > -int pool_wait_free = 1;
> > -int pool_wait_gc = 8;
> > +
> > +#define POOL_WAIT_FREE SEC_TO_NSEC(1)
> > +#define POOL_WAIT_GC   SEC_TO_NSEC(8)
> > +
> > +/* XXX where do I put this? */
> > +uint64_t
> > +getnsecuptime(void)
> > +{
> > +   struct timespec now;
> > +
> > +   getnanouptime();
> > +   return TIMESPEC_TO_NSEC();
> > +}
> >  
> >  RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare);
> >  
> > @@ -797,7 +809,7 @@ pool_put(struct pool *pp, void *v)
> > /* is it time to free a page? */
> > if (pp->pr_nidle > pp->pr_maxpages &&
> > (ph = TAILQ_FIRST(>pr_emptypages)) != NULL &&
> > -   (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
> > +   getnsecuptime() - ph->ph_timestamp > POOL_WAIT_FREE) {
> >   

Re: pool(9): remove ticks (attempt 2)

2020-12-11 Thread Mark Kettenis
> Date: Fri, 11 Dec 2020 11:51:54 -0600
> From: Scott Cheloha 
> 
> On Fri, Dec 11, 2020 at 09:49:07AM -0300, Martin Pieuchot wrote:
> > On 11/12/20(Fri) 12:52, Mark Kettenis wrote:
> > > > Date: Thu, 10 Dec 2020 16:13:22 -0600
> > > > From: Scott Cheloha 
> > > > 
> > > > Hi,
> > > > 
> > > > We looked at removing the ticks from subr_pool.c a while back but it
> > > > got shelved.  That may or may not have been my fault.  I don't
> > > > remember.
> > > > 
> > > > Anyway, I would normally suggest switching to getuptime(9) here, but
> > > > getuptime(9) counts in seconds and we're working with a 1 second
> > > > timeout in this code (pool_wait_free) so that's too coarse an
> > > > interface for this job.
> > > > 
> > > > The next best thing I could come up with was introducing a coarse
> > > > sub-second interface for use in this file, "getnsecuptime()", which
> > > > calls getnanouptime(9) and converts the result to a 64-bit count of
> > > > nanoseconds.  This is relatively fast (we don't read the underlying
> > > > timecounter hardware) and causes a minimal amount of code change (we
> > > > can use it inline because it returns an integral value).
> > > > 
> > > > >From there the changes are simple:
> > > > 
> > > > - Renames: ph_tick -> ph_timestamp, pr_cache_tick -> pr_cache_timestamp
> > > > 
> > > > - Call getnsecuptime(9) wherever we read 'ticks'.
> > > > 
> > > > - Change pool_wait_gc and pool_wait_free to counts of nanoseconds.
> > > >   They could be macros, e.g.
> > > > 
> > > > #define POOL_WAIT_GC80ULL
> > > > 
> > > >   but I'll leave that for a second diff to keep things simple.
> > > > 
> > > > This compiles and I haven't changed any logic so I assume it isn't
> > > > broken.
> > > > 
> > > > We could move getnsecuptime() into kern_tc.c but it isn't used
> > > > anywhere else yet so I'm hesitant to do so.
> > > > 
> > > > Thoughts?
> > > 
> > > Specifying the timeouts in nanoseconds isn't particularly useful I'd
> > > say.  But I see we can't use SEC_TO_NSEC here because of the overflow
> > > check...
> > 
> > I'm not sure to understand, can't we do:
> > 
> > pool_wait_free = SEC_TO_NSEC(1);
> > pool_wait_gc = SEC_TO_NSEC(8);
> > 
> > or are you pointing at something else?
> 
> We can do that at runtime but not at compile time.  SEC_TO_NSEC(1)
> isn't a constant so that won't compile (I just tried).
> 
> We _could_ do something like this:
> 
> #define POOL_WAIT_FREESEC_TO_NSEC(1)
> 
> I think the compiler will probably inline the result and elide the
> overflow check because the input is a constant.  I don't know how to
> verify this, but my limited understanding of compilers suggests that
> this is totally possible.

Yes.  The consequence of that is that the values are no longer
patchable.  That may not be very important though (I never really use
that possibility).

> > One comment below
> > 
> > [...]
> >
> > > > Index: sys/pool.h
> > > > ===
> > > > RCS file: /cvs/src/sys/sys/pool.h,v
> > > > retrieving revision 1.77
> > > > diff -u -p -r1.77 pool.h
> > > > --- sys/pool.h  19 Jul 2019 09:03:03 -  1.77
> > > > +++ sys/pool.h  10 Dec 2020 22:08:33 -
> > > > @@ -201,7 +201,7 @@ struct pool {
> > > > u_int   pr_cache_items; /* target list length */
> > > > u_int   pr_cache_contention;
> > > > u_int   pr_cache_contention_prev;
> > > > -   int pr_cache_tick;  /* time idle list was empty */
> > > > +   uint64_tpr_cache_timestamp; /* when idle list was 
> > > > empty */
> > > > int pr_cache_nout;
> > 
> > Do you see a change in of size in the struct?  If so, does moving
> > `pr_cache_nout' after `pr_cache_ngc' helps?
> 
> Right, the structs.  We're changing both struct pool_page_header and
> struct pool.
> 
> For the unmodified (-current) binary on amd64:
> 
> $ gdb -q obj/bsd.gdb
> (gdb) p sizeof(struct pool_page_header)
> $1 = 112
> (gdb) p sizeof(struct pool)
> $2 = 424
> 
> For the patched binary on amd64:
> 
> (gdb) p sizeof(struct pool_page_header) 
> $1 = 112
> (gdb) p sizeof(struct pool)
> $2 = 432
> 
> kettenis@: The pool_page_header does not grow.  That's good, right?
> 
> If do as mpi@ suggested and we move pr_cache_nout after
> pr_cache_ngc...
> 
> $ gdb -q obj/bsd.gdb 
> (gdb) p sizeof(struct pool_page_header)
> $1 = 112
> (gdb) p sizeof(struct pool)
> $2 = 424
> 
> Bingo!  Same size.  mpi@, how did you know?
> 
> Patch attached with the struct shuffling and the new macros below.
> 
> Index: kern/subr_pool.c
> ===
> RCS file: /cvs/src/sys/kern/subr_pool.c,v
> retrieving revision 1.230
> diff -u -p -r1.230 subr_pool.c
> --- kern/subr_pool.c  24 Jan 2020 06:31:17 -  1.230
> +++ kern/subr_pool.c  11 Dec 2020 17:50:46 -
> @@ -41,6 +41,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  

Re: pool(9): remove ticks (attempt 2)

2020-12-11 Thread Scott Cheloha
On Fri, Dec 11, 2020 at 09:49:07AM -0300, Martin Pieuchot wrote:
> On 11/12/20(Fri) 12:52, Mark Kettenis wrote:
> > > Date: Thu, 10 Dec 2020 16:13:22 -0600
> > > From: Scott Cheloha 
> > > 
> > > Hi,
> > > 
> > > We looked at removing the ticks from subr_pool.c a while back but it
> > > got shelved.  That may or may not have been my fault.  I don't
> > > remember.
> > > 
> > > Anyway, I would normally suggest switching to getuptime(9) here, but
> > > getuptime(9) counts in seconds and we're working with a 1 second
> > > timeout in this code (pool_wait_free) so that's too coarse an
> > > interface for this job.
> > > 
> > > The next best thing I could come up with was introducing a coarse
> > > sub-second interface for use in this file, "getnsecuptime()", which
> > > calls getnanouptime(9) and converts the result to a 64-bit count of
> > > nanoseconds.  This is relatively fast (we don't read the underlying
> > > timecounter hardware) and causes a minimal amount of code change (we
> > > can use it inline because it returns an integral value).
> > > 
> > > >From there the changes are simple:
> > > 
> > > - Renames: ph_tick -> ph_timestamp, pr_cache_tick -> pr_cache_timestamp
> > > 
> > > - Call getnsecuptime(9) wherever we read 'ticks'.
> > > 
> > > - Change pool_wait_gc and pool_wait_free to counts of nanoseconds.
> > >   They could be macros, e.g.
> > > 
> > > #define POOL_WAIT_GC  80ULL
> > > 
> > >   but I'll leave that for a second diff to keep things simple.
> > > 
> > > This compiles and I haven't changed any logic so I assume it isn't
> > > broken.
> > > 
> > > We could move getnsecuptime() into kern_tc.c but it isn't used
> > > anywhere else yet so I'm hesitant to do so.
> > > 
> > > Thoughts?
> > 
> > Specifying the timeouts in nanoseconds isn't particularly useful I'd
> > say.  But I see we can't use SEC_TO_NSEC here because of the overflow
> > check...
> 
> I'm not sure to understand, can't we do:
> 
>   pool_wait_free = SEC_TO_NSEC(1);
>   pool_wait_gc = SEC_TO_NSEC(8);
> 
> or are you pointing at something else?

We can do that at runtime but not at compile time.  SEC_TO_NSEC(1)
isn't a constant so that won't compile (I just tried).

We _could_ do something like this:

#define POOL_WAIT_FREE  SEC_TO_NSEC(1)

I think the compiler will probably inline the result and elide the
overflow check because the input is a constant.  I don't know how to
verify this, but my limited understanding of compilers suggests that
this is totally possible.

> One comment below
> 
> [...]
>
> > > Index: sys/pool.h
> > > ===
> > > RCS file: /cvs/src/sys/sys/pool.h,v
> > > retrieving revision 1.77
> > > diff -u -p -r1.77 pool.h
> > > --- sys/pool.h19 Jul 2019 09:03:03 -  1.77
> > > +++ sys/pool.h10 Dec 2020 22:08:33 -
> > > @@ -201,7 +201,7 @@ struct pool {
> > >   u_int   pr_cache_items; /* target list length */
> > >   u_int   pr_cache_contention;
> > >   u_int   pr_cache_contention_prev;
> > > - int pr_cache_tick;  /* time idle list was empty */
> > > + uint64_tpr_cache_timestamp; /* when idle list was empty */
> > >   int pr_cache_nout;
> 
> Do you see a change in of size in the struct?  If so, does moving
> `pr_cache_nout' after `pr_cache_ngc' helps?

Right, the structs.  We're changing both struct pool_page_header and
struct pool.

For the unmodified (-current) binary on amd64:

$ gdb -q obj/bsd.gdb
(gdb) p sizeof(struct pool_page_header)
$1 = 112
(gdb) p sizeof(struct pool)
$2 = 424

For the patched binary on amd64:

(gdb) p sizeof(struct pool_page_header) 
$1 = 112
(gdb) p sizeof(struct pool)
$2 = 432

kettenis@: The pool_page_header does not grow.  That's good, right?

If do as mpi@ suggested and we move pr_cache_nout after
pr_cache_ngc...

$ gdb -q obj/bsd.gdb 
(gdb) p sizeof(struct pool_page_header)
$1 = 112
(gdb) p sizeof(struct pool)
$2 = 424

Bingo!  Same size.  mpi@, how did you know?

Patch attached with the struct shuffling and the new macros below.

Index: kern/subr_pool.c
===
RCS file: /cvs/src/sys/kern/subr_pool.c,v
retrieving revision 1.230
diff -u -p -r1.230 subr_pool.c
--- kern/subr_pool.c24 Jan 2020 06:31:17 -  1.230
+++ kern/subr_pool.c11 Dec 2020 17:50:46 -
@@ -41,6 +41,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -148,7 +149,7 @@ struct pool_page_header {
caddr_t ph_page;/* this page's address */
caddr_t ph_colored; /* page's colored address */
unsigned long   ph_magic;
-   int ph_tick;
+   uint64_tph_timestamp;
 };
 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
@@ -266,8 +267,19 @@ void   pool_gc_sched(void 

Re: pool(9): remove ticks (attempt 2)

2020-12-11 Thread Martin Pieuchot
On 11/12/20(Fri) 12:52, Mark Kettenis wrote:
> > Date: Thu, 10 Dec 2020 16:13:22 -0600
> > From: Scott Cheloha 
> > 
> > Hi,
> > 
> > We looked at removing the ticks from subr_pool.c a while back but it
> > got shelved.  That may or may not have been my fault.  I don't
> > remember.
> > 
> > Anyway, I would normally suggest switching to getuptime(9) here, but
> > getuptime(9) counts in seconds and we're working with a 1 second
> > timeout in this code (pool_wait_free) so that's too coarse an
> > interface for this job.
> > 
> > The next best thing I could come up with was introducing a coarse
> > sub-second interface for use in this file, "getnsecuptime()", which
> > calls getnanouptime(9) and converts the result to a 64-bit count of
> > nanoseconds.  This is relatively fast (we don't read the underlying
> > timecounter hardware) and causes a minimal amount of code change (we
> > can use it inline because it returns an integral value).
> > 
> > >From there the changes are simple:
> > 
> > - Renames: ph_tick -> ph_timestamp, pr_cache_tick -> pr_cache_timestamp
> > 
> > - Call getnsecuptime(9) wherever we read 'ticks'.
> > 
> > - Change pool_wait_gc and pool_wait_free to counts of nanoseconds.
> >   They could be macros, e.g.
> > 
> > #define POOL_WAIT_GC80ULL
> > 
> >   but I'll leave that for a second diff to keep things simple.
> > 
> > This compiles and I haven't changed any logic so I assume it isn't
> > broken.
> > 
> > We could move getnsecuptime() into kern_tc.c but it isn't used
> > anywhere else yet so I'm hesitant to do so.
> > 
> > Thoughts?
> 
> Specifying the timeouts in nanoseconds isn't particularly useful I'd
> say.  But I see we can't use SEC_TO_NSEC here because of the overflow
> check...

I'm not sure to understand, can't we do:

pool_wait_free = SEC_TO_NSEC(1);
pool_wait_gc = SEC_TO_NSEC(8);

or are you pointing at something else?

One comment below

> > Index: kern/subr_pool.c
> > ===
> > RCS file: /cvs/src/sys/kern/subr_pool.c,v
> > retrieving revision 1.230
> > diff -u -p -r1.230 subr_pool.c
> > --- kern/subr_pool.c24 Jan 2020 06:31:17 -  1.230
> > +++ kern/subr_pool.c10 Dec 2020 22:08:33 -
> > @@ -41,6 +41,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >  
> > @@ -148,7 +149,7 @@ struct pool_page_header {
> > caddr_t ph_page;/* this page's address */
> > caddr_t ph_colored; /* page's colored address */
> > unsigned long   ph_magic;
> > -   int ph_tick;
> > +   uint64_tph_timestamp;   /* uptime when last modified */
> >  };
> >  #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
> >  #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
> > @@ -266,8 +267,18 @@ void   pool_gc_sched(void *);
> >  struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL);
> >  void   pool_gc_pages(void *);
> >  struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL);
> > -int pool_wait_free = 1;
> > -int pool_wait_gc = 8;
> > +uint64_t pool_wait_free = 10ULL;   /* nanoseconds */
> > +uint64_t pool_wait_gc = 80ULL; /* nanoseconds */
> > +
> > +/* XXX where do I put this? */
> > +uint64_t
> > +getnsecuptime(void)
> > +{
> > +   struct timespec now;
> > +
> > +   getnanouptime();
> > +   return TIMESPEC_TO_NSEC();
> > +}
> >  
> >  RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare);
> >  
> > @@ -797,7 +808,7 @@ pool_put(struct pool *pp, void *v)
> > /* is it time to free a page? */
> > if (pp->pr_nidle > pp->pr_maxpages &&
> > (ph = TAILQ_FIRST(>pr_emptypages)) != NULL &&
> > -   (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
> > +   getnsecuptime() - ph->ph_timestamp > pool_wait_free) {
> > freeph = ph;
> > pool_p_remove(pp, freeph);
> > }
> > @@ -864,7 +875,7 @@ pool_do_put(struct pool *pp, void *v)
> >  */
> > pp->pr_nidle++;
> >  
> > -   ph->ph_tick = ticks;
> > +   ph->ph_timestamp = getnsecuptime();
> > TAILQ_REMOVE(>pr_partpages, ph, ph_entry);
> > TAILQ_INSERT_TAIL(>pr_emptypages, ph, ph_entry);
> > pool_update_curpage(pp);
> > @@ -1566,7 +1577,7 @@ pool_gc_pages(void *null)
> > /* is it time to free a page? */
> > if (pp->pr_nidle > pp->pr_minpages &&
> > (ph = TAILQ_FIRST(>pr_emptypages)) != NULL &&
> > -   (ticks - ph->ph_tick) > (hz * pool_wait_gc)) {
> > +   getnsecuptime() - ph->ph_timestamp > pool_wait_gc) {
> > freeph = ph;
> > pool_p_remove(pp, freeph);
> > } else
> > @@ -1726,7 +1737,7 @@ pool_cache_init(struct pool *pp)
> > arc4random_buf(pp->pr_cache_magic, 

Re: pool(9): remove ticks (attempt 2)

2020-12-11 Thread Mark Kettenis
> Date: Thu, 10 Dec 2020 16:13:22 -0600
> From: Scott Cheloha 
> 
> Hi,
> 
> We looked at removing the ticks from subr_pool.c a while back but it
> got shelved.  That may or may not have been my fault.  I don't
> remember.
> 
> Anyway, I would normally suggest switching to getuptime(9) here, but
> getuptime(9) counts in seconds and we're working with a 1 second
> timeout in this code (pool_wait_free) so that's too coarse an
> interface for this job.
> 
> The next best thing I could come up with was introducing a coarse
> sub-second interface for use in this file, "getnsecuptime()", which
> calls getnanouptime(9) and converts the result to a 64-bit count of
> nanoseconds.  This is relatively fast (we don't read the underlying
> timecounter hardware) and causes a minimal amount of code change (we
> can use it inline because it returns an integral value).
> 
> >From there the changes are simple:
> 
> - Renames: ph_tick -> ph_timestamp, pr_cache_tick -> pr_cache_timestamp
> 
> - Call getnsecuptime(9) wherever we read 'ticks'.
> 
> - Change pool_wait_gc and pool_wait_free to counts of nanoseconds.
>   They could be macros, e.g.
> 
> #define POOL_WAIT_GC  80ULL
> 
>   but I'll leave that for a second diff to keep things simple.
> 
> This compiles and I haven't changed any logic so I assume it isn't
> broken.
> 
> We could move getnsecuptime() into kern_tc.c but it isn't used
> anywhere else yet so I'm hesitant to do so.
> 
> Thoughts?

Specifying the timeouts in nanoseconds isn't particularly useful I'd
say.  But I see we can't use SEC_TO_NSEC here because of the overflow
check...

Growing the pool page header is a bit unfortunate, but uint64_t is
better than a full struct timespec I suppose.  Using getxxxuptime() is
the right call here.


> Index: kern/subr_pool.c
> ===
> RCS file: /cvs/src/sys/kern/subr_pool.c,v
> retrieving revision 1.230
> diff -u -p -r1.230 subr_pool.c
> --- kern/subr_pool.c  24 Jan 2020 06:31:17 -  1.230
> +++ kern/subr_pool.c  10 Dec 2020 22:08:33 -
> @@ -41,6 +41,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  
> @@ -148,7 +149,7 @@ struct pool_page_header {
>   caddr_t ph_page;/* this page's address */
>   caddr_t ph_colored; /* page's colored address */
>   unsigned long   ph_magic;
> - int ph_tick;
> + uint64_tph_timestamp;   /* uptime when last modified */
>  };
>  #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
>  #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
> @@ -266,8 +267,18 @@ void pool_gc_sched(void *);
>  struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL);
>  void pool_gc_pages(void *);
>  struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL);
> -int pool_wait_free = 1;
> -int pool_wait_gc = 8;
> +uint64_t pool_wait_free = 10ULL; /* nanoseconds */
> +uint64_t pool_wait_gc = 80ULL;   /* nanoseconds */
> +
> +/* XXX where do I put this? */
> +uint64_t
> +getnsecuptime(void)
> +{
> + struct timespec now;
> +
> + getnanouptime();
> + return TIMESPEC_TO_NSEC();
> +}
>  
>  RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare);
>  
> @@ -797,7 +808,7 @@ pool_put(struct pool *pp, void *v)
>   /* is it time to free a page? */
>   if (pp->pr_nidle > pp->pr_maxpages &&
>   (ph = TAILQ_FIRST(>pr_emptypages)) != NULL &&
> - (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
> + getnsecuptime() - ph->ph_timestamp > pool_wait_free) {
>   freeph = ph;
>   pool_p_remove(pp, freeph);
>   }
> @@ -864,7 +875,7 @@ pool_do_put(struct pool *pp, void *v)
>*/
>   pp->pr_nidle++;
>  
> - ph->ph_tick = ticks;
> + ph->ph_timestamp = getnsecuptime();
>   TAILQ_REMOVE(>pr_partpages, ph, ph_entry);
>   TAILQ_INSERT_TAIL(>pr_emptypages, ph, ph_entry);
>   pool_update_curpage(pp);
> @@ -1566,7 +1577,7 @@ pool_gc_pages(void *null)
>   /* is it time to free a page? */
>   if (pp->pr_nidle > pp->pr_minpages &&
>   (ph = TAILQ_FIRST(>pr_emptypages)) != NULL &&
> - (ticks - ph->ph_tick) > (hz * pool_wait_gc)) {
> + getnsecuptime() - ph->ph_timestamp > pool_wait_gc) {
>   freeph = ph;
>   pool_p_remove(pp, freeph);
>   } else
> @@ -1726,7 +1737,7 @@ pool_cache_init(struct pool *pp)
>   arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic));
>   TAILQ_INIT(>pr_cache_lists);
>   pp->pr_cache_nitems = 0;
> - pp->pr_cache_tick = ticks;
> + pp->pr_cache_timestamp = getnsecuptime();
>   pp->pr_cache_items = 8;
>   pp->pr_cache_contention = 0;
>   

pool(9): remove ticks (attempt 2)

2020-12-10 Thread Scott Cheloha
Hi,

We looked at removing the ticks from subr_pool.c a while back but it
got shelved.  That may or may not have been my fault.  I don't
remember.

Anyway, I would normally suggest switching to getuptime(9) here, but
getuptime(9) counts in seconds and we're working with a 1 second
timeout in this code (pool_wait_free) so that's too coarse an
interface for this job.

The next best thing I could come up with was introducing a coarse
sub-second interface for use in this file, "getnsecuptime()", which
calls getnanouptime(9) and converts the result to a 64-bit count of
nanoseconds.  This is relatively fast (we don't read the underlying
timecounter hardware) and causes a minimal amount of code change (we
can use it inline because it returns an integral value).

>From there the changes are simple:

- Renames: ph_tick -> ph_timestamp, pr_cache_tick -> pr_cache_timestamp

- Call getnsecuptime(9) wherever we read 'ticks'.

- Change pool_wait_gc and pool_wait_free to counts of nanoseconds.
  They could be macros, e.g.

#define POOL_WAIT_GC80ULL

  but I'll leave that for a second diff to keep things simple.

This compiles and I haven't changed any logic so I assume it isn't
broken.

We could move getnsecuptime() into kern_tc.c but it isn't used
anywhere else yet so I'm hesitant to do so.

Thoughts?

Index: kern/subr_pool.c
===
RCS file: /cvs/src/sys/kern/subr_pool.c,v
retrieving revision 1.230
diff -u -p -r1.230 subr_pool.c
--- kern/subr_pool.c24 Jan 2020 06:31:17 -  1.230
+++ kern/subr_pool.c10 Dec 2020 22:08:33 -
@@ -41,6 +41,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -148,7 +149,7 @@ struct pool_page_header {
caddr_t ph_page;/* this page's address */
caddr_t ph_colored; /* page's colored address */
unsigned long   ph_magic;
-   int ph_tick;
+   uint64_tph_timestamp;   /* uptime when last modified */
 };
 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
@@ -266,8 +267,18 @@ void   pool_gc_sched(void *);
 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL);
 void   pool_gc_pages(void *);
 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL);
-int pool_wait_free = 1;
-int pool_wait_gc = 8;
+uint64_t pool_wait_free = 10ULL;   /* nanoseconds */
+uint64_t pool_wait_gc = 80ULL; /* nanoseconds */
+
+/* XXX where do I put this? */
+uint64_t
+getnsecuptime(void)
+{
+   struct timespec now;
+
+   getnanouptime();
+   return TIMESPEC_TO_NSEC();
+}
 
 RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare);
 
@@ -797,7 +808,7 @@ pool_put(struct pool *pp, void *v)
/* is it time to free a page? */
if (pp->pr_nidle > pp->pr_maxpages &&
(ph = TAILQ_FIRST(>pr_emptypages)) != NULL &&
-   (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
+   getnsecuptime() - ph->ph_timestamp > pool_wait_free) {
freeph = ph;
pool_p_remove(pp, freeph);
}
@@ -864,7 +875,7 @@ pool_do_put(struct pool *pp, void *v)
 */
pp->pr_nidle++;
 
-   ph->ph_tick = ticks;
+   ph->ph_timestamp = getnsecuptime();
TAILQ_REMOVE(>pr_partpages, ph, ph_entry);
TAILQ_INSERT_TAIL(>pr_emptypages, ph, ph_entry);
pool_update_curpage(pp);
@@ -1566,7 +1577,7 @@ pool_gc_pages(void *null)
/* is it time to free a page? */
if (pp->pr_nidle > pp->pr_minpages &&
(ph = TAILQ_FIRST(>pr_emptypages)) != NULL &&
-   (ticks - ph->ph_tick) > (hz * pool_wait_gc)) {
+   getnsecuptime() - ph->ph_timestamp > pool_wait_gc) {
freeph = ph;
pool_p_remove(pp, freeph);
} else
@@ -1726,7 +1737,7 @@ pool_cache_init(struct pool *pp)
arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic));
TAILQ_INIT(>pr_cache_lists);
pp->pr_cache_nitems = 0;
-   pp->pr_cache_tick = ticks;
+   pp->pr_cache_timestamp = getnsecuptime();
pp->pr_cache_items = 8;
pp->pr_cache_contention = 0;
pp->pr_cache_ngc = 0;
@@ -1829,7 +1840,7 @@ pool_cache_list_free(struct pool *pp, st
 {
pool_list_enter(pp);
if (TAILQ_EMPTY(>pr_cache_lists))
-   pp->pr_cache_tick = ticks;
+   pp->pr_cache_timestamp = getnsecuptime();
 
pp->pr_cache_nitems += POOL_CACHE_ITEM_NITEMS(ci);
TAILQ_INSERT_TAIL(>pr_cache_lists, ci, ci_nextl);
@@ -2006,7 +2017,7 @@ pool_cache_gc(struct pool *pp)
 {
unsigned int contention, delta;
 
-   if ((ticks - pp->pr_cache_tick) > (hz * pool_wait_gc) &&
+