ive posted this before as part of a much bigger diff, but smaller
is better.
it basically lets things ask for per-cpu item caches to be enabled
on pools. the most obvious use case for this is the mbuf pools.
the caches are modelled on whats described in the "Magazines and
Vmem: Extending the Slab Allocator to Many CPUs and Arbitrary
Resources" paper by Jeff Bonwick and Jonathan Adams. pools are
modelled on slabs, which bonwick described in a previous paper.
the main inspiration the paper provided was around how many objects
to cache on each cpu, and how often to move sets of objects between
the cpu caches and a global list of objects. unlike the paper we
do not care about maintaining constructed objects on the free lists,
so we reuse the objects themselves to build the free list.
id like to get this in so we can tinker with it in tree. the things
i think we need to tinker with are what poisioning we can get away
with on the per cpu caches, and what limits can we enforce at the
pool level.
i think poisioning will be relatively simple to add. the limits one
is more challenging because we dont want the pools to have to
coordinate between cpus for every get or put operation. my thought
there was to limit the number of pages that a pool can allocate
from its backend rather than limit the items the pool can provide.
limiting the pages could also be done at a lower level. eg, the
mbuf clusters could share a common backend that limits the pages
the pools can get, rather than have the cluster pools account for
pages separately.
anyway, either way i would like to get this in so we can work on
this stuff.
ok?
Index: kern/subr_pool.c
===================================================================
RCS file: /cvs/src/sys/kern/subr_pool.c,v
retrieving revision 1.198
diff -u -p -r1.198 subr_pool.c
--- kern/subr_pool.c 15 Sep 2016 02:00:16 -0000 1.198
+++ kern/subr_pool.c 24 Oct 2016 05:09:35 -0000
@@ -42,6 +42,7 @@
#include <sys/sysctl.h>
#include <sys/task.h>
#include <sys/timeout.h>
+#include <sys/percpu.h>
#include <uvm/uvm_extern.h>
@@ -96,6 +97,33 @@ struct pool_item {
};
#define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic)
+#ifdef MULTIPROCESSOR
+struct pool_list {
+ struct pool_list *pl_next; /* next in list */
+ unsigned long pl_cookie;
+ struct pool_list *pl_nextl; /* next list */
+ unsigned long pl_nitems; /* items in list */
+};
+
+struct pool_cache {
+ struct pool_list *pc_actv;
+ unsigned long pc_nactv; /* cache pc_actv nitems */
+ struct pool_list *pc_prev;
+
+ uint64_t pc_gen; /* generation number */
+ uint64_t pc_gets;
+ uint64_t pc_puts;
+ uint64_t pc_fails;
+
+ int pc_nout;
+};
+
+void *pool_cache_get(struct pool *);
+void pool_cache_put(struct pool *, void *);
+void pool_cache_destroy(struct pool *);
+#endif
+void pool_cache_info(struct pool *, struct kinfo_pool *);
+
#ifdef POOL_DEBUG
int pool_debug = 1;
#else
@@ -355,6 +383,11 @@ pool_destroy(struct pool *pp)
struct pool_item_header *ph;
struct pool *prev, *iter;
+#ifdef MULTIPROCESSOR
+ if (pp->pr_cache != NULL)
+ pool_cache_destroy(pp);
+#endif
+
#ifdef DIAGNOSTIC
if (pp->pr_nout != 0)
panic("%s: pool busy: still out: %u", __func__, pp->pr_nout);
@@ -421,6 +454,14 @@ pool_get(struct pool *pp, int flags)
void *v = NULL;
int slowdown = 0;
+#ifdef MULTIPROCESSOR
+ if (pp->pr_cache != NULL) {
+ v = pool_cache_get(pp);
+ if (v != NULL)
+ goto good;
+ }
+#endif
+
KASSERT(flags & (PR_WAITOK | PR_NOWAIT));
mtx_enter(&pp->pr_mtx);
@@ -453,6 +494,9 @@ pool_get(struct pool *pp, int flags)
v = mem.v;
}
+#ifdef MULTIPROCESSOR
+good:
+#endif
if (ISSET(flags, PR_ZERO))
memset(v, 0, pp->pr_size);
@@ -631,6 +675,13 @@ pool_put(struct pool *pp, void *v)
panic("%s: NULL item", __func__);
#endif
+#ifdef MULTIPROCESSOR
+ if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) {
+ pool_cache_put(pp, v);
+ return;
+ }
+#endif
+
mtx_enter(&pp->pr_mtx);
splassert(pp->pr_ipl);
@@ -1333,6 +1384,8 @@ sysctl_dopool(int *name, u_int namelen,
pi.pr_nidle = pp->pr_nidle;
mtx_leave(&pp->pr_mtx);
+ pool_cache_info(pp, &pi);
+
rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi));
break;
}
@@ -1499,3 +1552,259 @@ pool_multi_free_ni(struct pool *pp, void
km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
KERNEL_UNLOCK();
}
+
+#ifdef MULTIPROCESSOR
+
+struct pool pool_caches; /* per cpu cache entries */
+
+void
+pool_cache_init(struct pool *pp)
+{
+ struct cpumem *cm;
+ struct pool_cache *pc;
+ struct cpumem_iter i;
+
+ if (pool_caches.pr_size == 0) {
+ pool_init(&pool_caches, sizeof(struct pool_cache), 64,
+ IPL_NONE, PR_WAITOK, "plcache", NULL);
+ }
+
+ KASSERT(pp->pr_size >= sizeof(*pc));
+
+ cm = cpumem_get(&pool_caches);
+
+ mtx_init(&pp->pr_cache_mtx, pp->pr_ipl);
+ pp->pr_cache_list = NULL;
+ pp->pr_cache_nlist = 0;
+ pp->pr_cache_items = 8;
+ pp->pr_cache_contention = 0;
+
+ CPUMEM_FOREACH(pc, &i, cm) {
+ pc->pc_actv = NULL;
+ pc->pc_nactv = 0;
+ pc->pc_prev = NULL;
+
+ pc->pc_gets = 0;
+ pc->pc_puts = 0;
+ pc->pc_fails = 0;
+ pc->pc_nout = 0;
+ }
+
+ pp->pr_cache = cm;
+}
+
+static inline void
+pool_list_enter(struct pool *pp)
+{
+ if (mtx_enter_try(&pp->pr_cache_mtx) == 0) {
+ mtx_enter(&pp->pr_cache_mtx);
+ pp->pr_cache_contention++;
+ }
+}
+
+static inline void
+pool_list_leave(struct pool *pp)
+{
+ mtx_leave(&pp->pr_cache_mtx);
+}
+
+static inline struct pool_list *
+pool_list_alloc(struct pool *pp, struct pool_cache *pc)
+{
+ struct pool_list *pl;
+
+ pool_list_enter(pp);
+ pl = pp->pr_cache_list;
+ if (pl != NULL) {
+ pp->pr_cache_list = pl->pl_nextl;
+ pp->pr_cache_nlist--;
+ }
+
+ pp->pr_cache_nout += pc->pc_nout;
+ pc->pc_nout = 0;
+ pool_list_leave(pp);
+
+ return (pl);
+}
+
+static inline void
+pool_list_free(struct pool *pp, struct pool_cache *pc, struct pool_list *pl)
+{
+ pool_list_enter(pp);
+ pl->pl_nextl = pp->pr_cache_list;
+ pp->pr_cache_list = pl;
+ pp->pr_cache_nlist++;
+
+ pp->pr_cache_nout += pc->pc_nout;
+ pc->pc_nout = 0;
+ pool_list_leave(pp);
+}
+
+static inline struct pool_cache *
+pool_cache_enter(struct pool *pp, int *s)
+{
+ struct pool_cache *pc;
+
+ pc = cpumem_enter(pp->pr_cache);
+ *s = splraise(pp->pr_ipl);
+ pc->pc_gen++;
+
+ return (pc);
+}
+
+static inline void
+pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s)
+{
+ pc->pc_gen++;
+ splx(s);
+ cpumem_leave(pp->pr_cache, pc);
+}
+
+void *
+pool_cache_get(struct pool *pp)
+{
+ struct pool_cache *pc;
+ struct pool_list *pl;
+ int s;
+
+ pc = pool_cache_enter(pp, &s);
+
+ if (pc->pc_actv != NULL) {
+ pl = pc->pc_actv;
+ } else if (pc->pc_prev != NULL) {
+ pl = pc->pc_prev;
+ pc->pc_prev = NULL;
+ } else if ((pl = pool_list_alloc(pp, pc)) == NULL) {
+ pc->pc_fails++;
+ goto done;
+ }
+
+ pc->pc_actv = pl->pl_next;
+ pc->pc_nactv = pl->pl_nitems - 1;
+ pc->pc_gets++;
+ pc->pc_nout++;
+done:
+ pool_cache_leave(pp, pc, s);
+
+ return (pl);
+}
+
+void
+pool_cache_put(struct pool *pp, void *v)
+{
+ struct pool_cache *pc;
+ struct pool_list *pl = v;
+ unsigned long cache_items = pp->pr_cache_items;
+ unsigned long nitems;
+ int s;
+
+ pc = pool_cache_enter(pp, &s);
+
+ nitems = pc->pc_nactv;
+ if (nitems >= cache_items) {
+ if (pc->pc_prev != NULL)
+ pool_list_free(pp, pc, pc->pc_prev);
+
+ pc->pc_prev = pc->pc_actv;
+
+ pc->pc_actv = NULL;
+ pc->pc_nactv = 0;
+ nitems = 0;
+ }
+
+ pl->pl_next = pc->pc_actv;
+ pl->pl_nitems = ++nitems;
+
+ pc->pc_actv = pl;
+ pc->pc_nactv = nitems;
+
+ pc->pc_puts++;
+ pc->pc_nout--;
+
+ pool_cache_leave(pp, pc, s);
+}
+
+struct pool_list *
+pool_list_put(struct pool *pp, struct pool_list *pl)
+{
+ struct pool_list *rpl, *npl;
+
+ if (pl == NULL)
+ return (NULL);
+
+ rpl = (struct pool_list *)pl->pl_next;
+
+ do {
+ npl = pl->pl_next;
+ pool_put(pp, pl);
+ pl = npl;
+ } while (pl != NULL);
+
+ return (rpl);
+}
+
+void
+pool_cache_destroy(struct pool *pp)
+{
+ struct pool_cache *pc;
+ struct pool_list *pl;
+ struct cpumem_iter i;
+ struct cpumem *cm;
+
+ cm = pp->pr_cache;
+ pp->pr_cache = NULL; /* make pool_put avoid the cache */
+
+ CPUMEM_FOREACH(pc, &i, cm) {
+ pool_list_put(pp, pc->pc_actv);
+ pool_list_put(pp, pc->pc_prev);
+ }
+
+ cpumem_put(&pool_caches, cm);
+
+ pl = pp->pr_cache_list;
+ while (pl != NULL)
+ pl = pool_list_put(pp, pl);
+}
+
+void
+pool_cache_info(struct pool *pp, struct kinfo_pool *pi)
+{
+ struct pool_cache *pc;
+ struct cpumem_iter i;
+
+ if (pp->pr_cache == NULL)
+ return;
+
+ mtx_enter(&pp->pr_cache_mtx);
+ CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
+ uint64_t gen, nget, nput;
+
+ do {
+ while ((gen = pc->pc_gen) & 1)
+ yield();
+
+ nget = pc->pc_gets;
+ nput = pc->pc_puts;
+ } while (gen != pc->pc_gen);
+
+ pi->pr_nout += pc->pc_nout;
+ pi->pr_nget += nget;
+ pi->pr_nput += nput;
+ }
+
+ pi->pr_nout += pp->pr_cache_nout;
+ mtx_leave(&pp->pr_cache_mtx);
+}
+#else /* MULTIPROCESSOR */
+void
+pool_cache_init(struct pool *pp)
+{
+ /* nop */
+}
+
+void
+pool_cache_info(struct pool *pp, struct kinfo_pool *pi)
+{
+ /* nop */
+}
+#endif /* MULTIPROCESSOR */
Index: sys/pool.h
===================================================================
RCS file: /cvs/src/sys/sys/pool.h,v
retrieving revision 1.63
diff -u -p -r1.63 pool.h
--- sys/pool.h 15 Sep 2016 02:00:16 -0000 1.63
+++ sys/pool.h 24 Oct 2016 05:09:36 -0000
@@ -84,6 +84,9 @@ struct pool_allocator {
TAILQ_HEAD(pool_pagelist, pool_item_header);
+struct pool_list;
+struct cpumem;
+
struct pool {
struct mutex pr_mtx;
SIMPLEQ_ENTRY(pool)
@@ -124,9 +127,18 @@ struct pool {
RBT_HEAD(phtree, pool_item_header)
pr_phtree;
- u_int pr_align;
- u_int pr_maxcolors; /* Cache coloring */
- int pr_phoffset; /* Offset in page of page header */
+ struct cpumem * pr_cache;
+ struct mutex pr_cache_mtx;
+ struct pool_list *
+ pr_cache_list;
+ u_int pr_cache_nlist;
+ u_int pr_cache_items;
+ u_int pr_cache_contention;
+ int pr_cache_nout;
+
+ u_int pr_align;
+ u_int pr_maxcolors; /* Cache coloring */
+ int pr_phoffset; /* Offset in page of page header */
/*
* Warning message to be issued, and a per-time-delta rate cap,
@@ -175,6 +187,7 @@ struct pool_request {
void pool_init(struct pool *, size_t, u_int, int, int,
const char *, struct pool_allocator *);
+void pool_cache_init(struct pool *);
void pool_destroy(struct pool *);
void pool_setlowat(struct pool *, int);
void pool_sethiwat(struct pool *, int);