It seems netstat -m is not printing the correct results with this
diff. The max values is wrong.
# sysctl kern.maxclusters
kern.maxclusters=262144
# netstat -m
9543 mbufs in use:
8044 mbufs allocated to data
1491 mbufs allocated to packet headers
8 mbufs allocated to socket names and addresses
0/72/64 mbuf 2048 byte clusters in use (current/peak/max)
8006/33735/120 mbuf 2112 byte clusters in use (current/peak/max)
0/48/64 mbuf 4096 byte clusters in use (current/peak/max)
0/64/64 mbuf 8192 byte clusters in use (current/peak/max)
0/56/112 mbuf 9216 byte clusters in use (current/peak/max)
0/60/80 mbuf 12288 byte clusters in use (current/peak/max)
0/128/64 mbuf 16384 byte clusters in use (current/peak/max)
0/72/64 mbuf 65536 byte clusters in use (current/peak/max)
93680 Kbytes allocated to network (20% in use)
On current without the diff:
# netstat -m
42 mbufs in use:
35 mbufs allocated to data
2 mbufs allocated to packet headers
5 mbufs allocated to socket names and addresses
0/8/262144 mbuf 2048 byte clusters in use (current/peak/max)
33/45/261900 mbuf 2112 byte clusters in use (current/peak/max)
0/8/131072 mbuf 4096 byte clusters in use (current/peak/max)
0/8/65536 mbuf 8192 byte clusters in use (current/peak/max)
0/14/58254 mbuf 9216 byte clusters in use (current/peak/max)
0/10/43690 mbuf 12288 byte clusters in use (current/peak/max)
0/8/32768 mbuf 16384 byte clusters in use (current/peak/max)
0/8/8192 mbuf 65536 byte clusters in use (current/peak/max)
1120 Kbytes allocated to network (7% in use)
0 requests for memory denied
0 requests for memory delayed
0 calls to protocol drain routines
2016-11-22 3:42 GMT+01:00, David Gwynne <[email protected]>:
> right now pools that make up mbufs are each limited individually.
>
> the following diff instead has the mbuf layer have a global limit
> on the amount of memory that can be allocated to the pools. this
> is enforced by wrapping the multi page pool allocator with something
> that checks the mbuf memory limit first.
>
> this means all mbufs will use a max of 2k * nmbclust bytes instead
> of each pool being able to use that amount each.
>
> ok?
>
> Index: sys/pool.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/pool.h,v
> retrieving revision 1.68
> diff -u -p -r1.68 pool.h
> --- sys/pool.h 21 Nov 2016 01:44:06 -0000 1.68
> +++ sys/pool.h 22 Nov 2016 02:31:47 -0000
> @@ -205,6 +205,7 @@ struct pool {
> #ifdef _KERNEL
>
> extern struct pool_allocator pool_allocator_single;
> +extern struct pool_allocator pool_allocator_multi;
>
> struct pool_request {
> TAILQ_ENTRY(pool_request) pr_entry;
> Index: sys/mbuf.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/mbuf.h,v
> retrieving revision 1.222
> diff -u -p -r1.222 mbuf.h
> --- sys/mbuf.h 24 Oct 2016 04:38:44 -0000 1.222
> +++ sys/mbuf.h 22 Nov 2016 02:31:47 -0000
> @@ -416,6 +416,7 @@ struct mbuf_queue {
> };
>
> #ifdef _KERNEL
> +struct pool;
>
> extern int nmbclust; /* limit on the # of clusters */
> extern int mblowat; /* mbuf low water mark */
> @@ -444,6 +445,7 @@ int m_leadingspace(struct mbuf *);
> int m_trailingspace(struct mbuf *);
> struct mbuf *m_clget(struct mbuf *, int, u_int);
> void m_extref(struct mbuf *, struct mbuf *);
> +void m_pool_init(struct pool *, u_int, u_int, const char *);
> void m_extfree_pool(caddr_t, u_int, void *);
> void m_adj(struct mbuf *, int);
> int m_copyback(struct mbuf *, int, int, const void *, int);
> Index: kern/uipc_mbuf.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v
> retrieving revision 1.238
> diff -u -p -r1.238 uipc_mbuf.c
> --- kern/uipc_mbuf.c 9 Nov 2016 08:55:11 -0000 1.238
> +++ kern/uipc_mbuf.c 22 Nov 2016 02:31:47 -0000
> @@ -133,6 +133,19 @@ void m_extfree(struct mbuf *);
> void nmbclust_update(void);
> void m_zero(struct mbuf *);
>
> +struct mutex m_pool_mtx = MUTEX_INITIALIZER(IPL_NET);
> +unsigned int mbuf_mem_limit; /* how much memory can we allocated */
> +unsigned int mbuf_mem_alloc; /* how much memory has been allocated */
> +
> +void *m_pool_alloc(struct pool *, int, int *);
> +void m_pool_free(struct pool *, void *);
> +
> +struct pool_allocator m_pool_allocator = {
> + m_pool_alloc,
> + m_pool_free,
> + 0 /* will be copied from pool_allocator_multi */
> +};
> +
> static void (*mextfree_fns[4])(caddr_t, u_int, void *);
> static u_int num_extfree_fns;
>
> @@ -148,6 +161,11 @@ mbinit(void)
> int i;
> unsigned int lowbits;
>
> + m_pool_allocator.pa_pagesz = pool_allocator_multi.pa_pagesz;
> +
> + nmbclust_update();
> + mbuf_mem_alloc = 0;
> +
> #if DIAGNOSTIC
> if (mclsizes[0] != MCLBYTES)
> panic("mbinit: the smallest cluster size != MCLBYTES");
> @@ -155,9 +173,7 @@ mbinit(void)
> panic("mbinit: the largest cluster size != MAXMCLBYTES");
> #endif
>
> - pool_init(&mbpool, MSIZE, 0, IPL_NET, 0, "mbufpl", NULL);
> - pool_set_constraints(&mbpool, &kp_dma_contig);
> - pool_setlowat(&mbpool, mblowat);
> + m_pool_init(&mbpool, MSIZE, 64, "mbufpl");
>
> pool_init(&mtagpool, PACKET_TAG_MAXSIZE + sizeof(struct m_tag), 0,
> IPL_NET, 0, "mtagpl", NULL);
> @@ -171,47 +187,32 @@ mbinit(void)
> snprintf(mclnames[i], sizeof(mclnames[0]), "mcl%dk",
> mclsizes[i] >> 10);
> }
> - pool_init(&mclpools[i], mclsizes[i], 64, IPL_NET, 0,
> - mclnames[i], NULL);
> - pool_set_constraints(&mclpools[i], &kp_dma_contig);
> - pool_setlowat(&mclpools[i], mcllowat);
> +
> + m_pool_init(&mclpools[i], mclsizes[i], 64, mclnames[i]);
> }
>
> (void)mextfree_register(m_extfree_pool);
> KASSERT(num_extfree_fns == 1);
> -
> - nmbclust_update();
> }
>
> void
> mbcpuinit()
> {
> + int i;
> +
> mbstat = counters_alloc_ncpus(mbstat, MBSTAT_COUNT, M_DEVBUF);
> +
> + pool_cache_init(&mbpool);
> + pool_cache_init(&mtagpool);
> +
> + for (i = 0; i < nitems(mclsizes); i++)
> + pool_cache_init(&mclpools[i]);
> }
>
> void
> nmbclust_update(void)
> {
> - unsigned int i, n;
> -
> - /*
> - * Set the hard limit on the mclpools to the number of
> - * mbuf clusters the kernel is to support. Log the limit
> - * reached message max once a minute.
> - */
> - for (i = 0; i < nitems(mclsizes); i++) {
> - n = (unsigned long long)nmbclust * MCLBYTES / mclsizes[i];
> - (void)pool_sethardlimit(&mclpools[i], n, mclpool_warnmsg, 60);
> - /*
> - * XXX this needs to be reconsidered.
> - * Setting the high water mark to nmbclust is too high
> - * but we need to have enough spare buffers around so that
> - * allocations in interrupt context don't fail or mclgeti()
> - * drivers may end up with empty rings.
> - */
> - pool_sethiwat(&mclpools[i], n);
> - }
> - pool_sethiwat(&mbpool, nmbclust);
> + mbuf_mem_limit = nmbclust * MCLBYTES;
> }
>
> /*
> @@ -1377,6 +1378,52 @@ m_dup_pkt(struct mbuf *m0, unsigned int
> fail:
> m_freem(m);
> return (NULL);
> +}
> +
> +void *
> +m_pool_alloc(struct pool *pp, int flags, int *slowdown)
> +{
> + void *v = NULL;
> + int avail = 1;
> +
> + if (mbuf_mem_alloc + pp->pr_pgsize > mbuf_mem_limit)
> + return (NULL);
> +
> + mtx_enter(&m_pool_mtx);
> + if (mbuf_mem_alloc + pp->pr_pgsize > mbuf_mem_limit)
> + avail = 0;
> + else
> + mbuf_mem_alloc += pp->pr_pgsize;
> + mtx_leave(&m_pool_mtx);
> +
> + if (avail) {
> + v = (*pool_allocator_multi.pa_alloc)(pp, flags, slowdown);
> +
> + if (v == NULL) {
> + mtx_enter(&m_pool_mtx);
> + mbuf_mem_alloc -= pp->pr_pgsize;
> + mtx_leave(&m_pool_mtx);
> + }
> + }
> +
> + return (v);
> +}
> +
> +void
> +m_pool_free(struct pool *pp, void *v)
> +{
> + (*pool_allocator_multi.pa_free)(pp, v);
> +
> + mtx_enter(&m_pool_mtx);
> + mbuf_mem_alloc -= pp->pr_pgsize;
> + mtx_leave(&m_pool_mtx);
> +}
> +
> +void
> +m_pool_init(struct pool *pp, u_int size, u_int align, const char *wmesg)
> +{
> + pool_init(pp, size, align, IPL_NET, 0, wmesg, &m_pool_allocator);
> + pool_set_constraints(pp, &kp_dma_contig);
> }
>
> #ifdef DDB
> Index: dev/pci/if_myx.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_myx.c,v
> retrieving revision 1.99
> diff -u -p -r1.99 if_myx.c
> --- dev/pci/if_myx.c 31 Oct 2016 01:38:57 -0000 1.99
> +++ dev/pci/if_myx.c 22 Nov 2016 02:31:47 -0000
> @@ -294,8 +294,6 @@ myx_attach(struct device *parent, struct
>
> /* this is sort of racy */
> if (myx_mcl_pool == NULL) {
> - extern struct kmem_pa_mode kp_dma_contig;
> -
> myx_mcl_pool = malloc(sizeof(*myx_mcl_pool), M_DEVBUF,
> M_WAITOK);
> if (myx_mcl_pool == NULL) {
> @@ -303,9 +301,9 @@ myx_attach(struct device *parent, struct
> DEVNAME(sc));
> goto unmap;
> }
> - pool_init(myx_mcl_pool, MYX_RXBIG_SIZE, MYX_BOUNDARY, IPL_NET,
> - 0, "myxmcl", NULL);
> - pool_set_constraints(myx_mcl_pool, &kp_dma_contig);
> +
> + m_pool_init(myx_mcl_pool, MYX_RXBIG_SIZE, MYX_BOUNDARY,
> + "myxmcl");
> }
>
> if (myx_pcie_dc(sc, pa) != 0)
>
>